From e294c1ecd9a985558e8447cf2fa954040016a23a Mon Sep 17 00:00:00 2001 From: Verma <62449205+vermas2012@users.noreply.github.com> Date: Thu, 19 Aug 2021 11:43:23 -0400 Subject: [PATCH] feat: Add support for renaming tables in schema translator (#3185) * feat: Add support for renaming tables in schema translator (#3154) Remove any column family transformations and reduce the transoformer to support table names only. We don't want to encourage column family renames as there are many implications of column family renames. Mainly, 1. read filters won't work as they involve regexes 2. Migration validation jobs won't work as the column families will be different and hashes won't match. There are complex ways to solve these issues but we can't see use-cases for column family renames that justify making this migration workflow complex around creation, querying and validation. (cherry picked from commit 7d49bba33c0b722e953ae20e4e393a7fd06a6567) * Fix javax.annotation build issues. * Fix version number in the comments * Fix the bad merge. * Incorporating PR feedback --- .../bigtable-hbase-1.x-shaded/pom.xml | 4 + .../bigtable-hbase-1.x-tools/pom.xml | 7 +- .../hbase/tools/HBaseSchemaTranslator.java | 170 +++++++++-- .../tools/HBaseSchemaTransformerTest.java | 268 ++++++++++++++++++ .../HBaseSchemaTranslationOptionsTest.java | 2 + .../tools/HBaseSchemaTranslatorTest.java | 101 ++++++- 6 files changed, 518 insertions(+), 34 deletions(-) create mode 100644 bigtable-hbase-1.x-parent/bigtable-hbase-1.x-tools/src/test/java/com/google/cloud/bigtable/hbase/tools/HBaseSchemaTransformerTest.java diff --git a/bigtable-hbase-1.x-parent/bigtable-hbase-1.x-shaded/pom.xml b/bigtable-hbase-1.x-parent/bigtable-hbase-1.x-shaded/pom.xml index c5ec2086e7..9ff785eec9 100644 --- a/bigtable-hbase-1.x-parent/bigtable-hbase-1.x-shaded/pom.xml +++ b/bigtable-hbase-1.x-parent/bigtable-hbase-1.x-shaded/pom.xml @@ -304,6 +304,10 @@ limitations under the License. com.google.bigtable.repackaged.org.codehaus + + org.checkerframework + com.google.bigtable.repackaged.org.checkerframework + diff --git a/bigtable-hbase-1.x-parent/bigtable-hbase-1.x-tools/pom.xml b/bigtable-hbase-1.x-parent/bigtable-hbase-1.x-tools/pom.xml index f515ceda85..f906ff5de9 100644 --- a/bigtable-hbase-1.x-parent/bigtable-hbase-1.x-tools/pom.xml +++ b/bigtable-hbase-1.x-parent/bigtable-hbase-1.x-tools/pom.xml @@ -51,7 +51,7 @@ com.google.guava guava - 30.0-jre + 30.1.1-android org.slf4j @@ -80,6 +80,11 @@ 4.13.2 test + + javax.annotation + javax.annotation-api + 1.3.2 + diff --git a/bigtable-hbase-1.x-parent/bigtable-hbase-1.x-tools/src/main/java/com/google/cloud/bigtable/hbase/tools/HBaseSchemaTranslator.java b/bigtable-hbase-1.x-parent/bigtable-hbase-1.x-tools/src/main/java/com/google/cloud/bigtable/hbase/tools/HBaseSchemaTranslator.java index 9fa6674bf8..4dab136ce0 100644 --- a/bigtable-hbase-1.x-parent/bigtable-hbase-1.x-tools/src/main/java/com/google/cloud/bigtable/hbase/tools/HBaseSchemaTranslator.java +++ b/bigtable-hbase-1.x-parent/bigtable-hbase-1.x-tools/src/main/java/com/google/cloud/bigtable/hbase/tools/HBaseSchemaTranslator.java @@ -16,32 +16,37 @@ package com.google.cloud.bigtable.hbase.tools; import com.google.bigtable.repackaged.com.google.api.core.InternalApi; +import com.google.bigtable.repackaged.com.google.common.annotations.VisibleForTesting; import com.google.bigtable.repackaged.com.google.common.base.Preconditions; import com.google.bigtable.repackaged.com.google.gson.Gson; +import com.google.bigtable.repackaged.com.google.gson.reflect.TypeToken; import com.google.cloud.bigtable.hbase.BigtableConfiguration; import com.google.cloud.bigtable.hbase.BigtableOptionsFactory; import com.google.cloud.bigtable.hbase.tools.ClusterSchemaDefinition.TableSchemaDefinition; -import com.google.common.annotations.VisibleForTesting; import java.io.File; import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; import java.io.Reader; import java.io.Writer; +import java.lang.reflect.Type; import java.net.URISyntaxException; import java.util.ArrayList; import java.util.Arrays; import java.util.LinkedList; import java.util.List; +import java.util.Map; import javax.annotation.Nullable; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseConfiguration; +import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.Admin; import org.apache.hadoop.hbase.client.Connection; import org.apache.hadoop.hbase.client.ConnectionFactory; +import org.apache.hadoop.hbase.exceptions.DeserializationException; import org.apache.log4j.BasicConfigurator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -52,8 +57,7 @@ *

Execute the following command to copy the schema from HBase to Cloud Bigtable: * *

- *   mvn exec:java \
- *  -Dexec.mainClass=com.google.cloud.bigtable.hbase.tools.HBaseSchemaTranslator \
+ * java -jar bigtable-hbase-1.x-tools--jar-with-dependencies.jar com.google.cloud.bigtable.hbase.tools.HBaseSchemaTranslator \
  *  -Dhbase.zookeeper.quorum=$ZOOKEEPER_QUORUM \
  *  -Dhbase.zookeeper.property.clientPort=$ZOOKEEPER_PORT \
  *  -Dgoogle.bigtable.table.filter=$TABLE_NAME_REGEX \
@@ -69,8 +73,7 @@
  * 

Run the tool from a host that can connect to HBase. Store HBase schema in a file: * *

- *   mvn exec:java \
- *  -Dexec.mainClass=com.google.cloud.bigtable.hbase.tools.HBaseSchemaTranslator \
+ * java -jar bigtable-hbase-1.x-tools--jar-with-dependencies.jar com.google.cloud.bigtable.hbase.tools.HBaseSchemaTranslator \
  *  -Dhbase.zookeeper.quorum=$ZOOKEEPER_QUORUM \
  *  -Dhbase.zookeeper.property.clientPort=$ZOOKEEPER_PORT \
  *  -Dgoogle.bigtable.table.filter=$TABLE_NAME_REGEX \
@@ -81,8 +84,7 @@
  * Bigtable using the schema file:
  *
  * 
- *   mvn exec:java \
- *  -Dexec.mainClass=com.google.cloud.bigtable.hbase.tools.HBaseSchemaTranslator \
+ * java -jar bigtable-hbase-1.x-tools--jar-with-dependencies.jar com.google.cloud.bigtable.hbase.tools.HBaseSchemaTranslator \
  *  -Dgoogle.bigtable.input.filepath=$SCHEMA_FILE_PATH \
  *  -Dgoogle.bigtable.project.id=$PROJECT_ID \
  *  -Dgoogle.bigtable.instance.id=$INSTANCE_ID
@@ -98,23 +100,25 @@ public class HBaseSchemaTranslator {
   public static final String INPUT_FILE_KEY = "google.bigtable.input.filepath";
   public static final String OUTPUT_FILE_KEY = "google.bigtable.output.filepath";
   public static final String TABLE_NAME_FILTER_KEY = "google.bigtable.table.filter";
+  public static final String SCHEMA_MAPPING_FILEPATH = "google.bigtable.schema.mapping.filepath";
 
   private static final Logger LOG = LoggerFactory.getLogger(HBaseSchemaTranslator.class);
 
   private final SchemaReader schemaReader;
+  private final SchemaTransformer schemaTransformer;
   private final SchemaWriter schemaWriter;
-  // TODO Add a schemaOverrider
 
   @VisibleForTesting
   static class SchemaTranslationOptions {
 
-    String projectId;
-    String instanceId;
-    String zookeeperQuorum;
-    Integer zookeeperPort;
-    String inputFilePath;
-    String outputFilePath;
-    String tableNameFilter;
+    @Nullable String projectId;
+    @Nullable String instanceId;
+    @Nullable String zookeeperQuorum;
+    @Nullable Integer zookeeperPort;
+    @Nullable String inputFilePath;
+    @Nullable String outputFilePath;
+    @Nullable String tableNameFilter;
+    @Nullable String schemaMappingFilePath;
 
     @VisibleForTesting
     SchemaTranslationOptions() {}
@@ -160,6 +164,7 @@ public static SchemaTranslationOptions loadOptionsFromSystemProperties() {
       }
 
       options.tableNameFilter = System.getProperty(TABLE_NAME_FILTER_KEY);
+      options.schemaMappingFilePath = System.getProperty(SCHEMA_MAPPING_FILEPATH);
 
       // Ensure that the options are set properly
       // TODO It is possible to validate the options without creating the object, but its less
@@ -175,8 +180,7 @@ public static SchemaTranslationOptions loadOptionsFromSystemProperties() {
   }
 
   /** Interface for reading HBase schema. */
-  interface SchemaReader {
-
+  private interface SchemaReader {
     ClusterSchemaDefinition readSchema() throws IOException;
   }
 
@@ -184,6 +188,7 @@ interface SchemaReader {
    * Reads HBase schema from a JSON file. JSON file should be representation of a {@link
    * ClusterSchemaDefinition} object.
    */
+  @VisibleForTesting
   static class FileBasedSchemaReader implements SchemaReader {
 
     private final String schemaFilePath;
@@ -200,6 +205,7 @@ public ClusterSchemaDefinition readSchema() throws IOException {
   }
 
   /** Reads the HBase schema by connecting to an HBase cluster. */
+  @VisibleForTesting
   static class HBaseSchemaReader implements SchemaReader {
 
     private final String tableFilterPattern;
@@ -245,14 +251,17 @@ private byte[][] getSplits(TableName table) throws IOException {
         return new byte[0][];
       }
 
-      byte[][] splits = new byte[regions.size()][];
-      int i = 0;
+      List splits = new ArrayList<>();
       for (HRegionInfo region : regions) {
-        splits[i] = region.getStartKey();
-        i++;
+        if (Arrays.equals(region.getStartKey(), HConstants.EMPTY_START_ROW)) {
+          // CBT client does not accept an empty row as a split.
+          continue;
+        }
+        splits.add(region.getStartKey());
       }
-      LOG.debug("Found {} splits for table {}.", splits.length, table.getNameAsString());
-      return splits;
+
+      LOG.debug("Found {} splits for table {}.", splits.size(), table.getNameAsString());
+      return splits.toArray(new byte[0][]);
     }
 
     @Override
@@ -273,7 +282,7 @@ public ClusterSchemaDefinition readSchema() throws IOException {
   /**
    * Interface for writing the HBase schema represented by a {@link ClusterSchemaDefinition} object.
    */
-  interface SchemaWriter {
+  private interface SchemaWriter {
 
     void writeSchema(ClusterSchemaDefinition schemaDefinition) throws IOException;
   }
@@ -282,6 +291,7 @@ interface SchemaWriter {
    * Writes the HBase schema into a file. File contains the JSON representation of the {@link
    * ClusterSchemaDefinition} object.
    */
+  @VisibleForTesting
   static class FileBasedSchemaWriter implements SchemaWriter {
 
     private final String outputFilePath;
@@ -304,6 +314,7 @@ public void writeSchema(ClusterSchemaDefinition schemaDefinition) throws IOExcep
    * Creates tables in Cloud Bigtable based on the schema provided by the {@link
    * ClusterSchemaDefinition} object.
    */
+  @VisibleForTesting
   static class BigtableSchemaWriter implements SchemaWriter {
 
     private final Admin btAdmin;
@@ -353,6 +364,15 @@ public HBaseSchemaTranslator(SchemaTranslationOptions options) throws IOExceptio
               options.zookeeperQuorum, options.zookeeperPort, options.tableNameFilter);
     }
 
+    if (options.schemaMappingFilePath != null) {
+
+      this.schemaTransformer =
+          JsonBasedSchemaTransformer.newSchemaTransformerFromJsonFile(
+              options.schemaMappingFilePath);
+    } else {
+      this.schemaTransformer = new NoopSchemaTransformer();
+    }
+
     if (options.outputFilePath != null) {
       this.schemaWriter = new FileBasedSchemaWriter(options.outputFilePath);
     } else {
@@ -360,16 +380,108 @@ public HBaseSchemaTranslator(SchemaTranslationOptions options) throws IOExceptio
     }
   }
 
+  /**
+   * Transforms the {@link ClusterSchemaDefinition} read by {@link SchemaReader} before writing it
+   * to {@link SchemaWriter}.
+   */
+  private interface SchemaTransformer {
+
+    ClusterSchemaDefinition transform(ClusterSchemaDefinition originalSchema)
+        throws IOException, DeserializationException;
+  }
+
+  /** No-op implementation of @{@link SchemaTransformer}. Returns the original schema definition. */
+  private static class NoopSchemaTransformer implements SchemaTransformer {
+
+    @Override
+    public ClusterSchemaDefinition transform(ClusterSchemaDefinition originalSchema) {
+      return originalSchema;
+    }
+  }
+
+  /**
+   * Transforms the @{@link ClusterSchemaDefinition} based on a provided JSON map. It can rename
+   * tables before writing them to {@link SchemaWriter}.
+   *
+   * 

JSON map should look like { "SourceTable": "DestinationTable", + * "sourceTable-2":"DestinationTable-2"} + */ + @VisibleForTesting + static class JsonBasedSchemaTransformer implements SchemaTransformer { + + // Map from old-tableName -> new-tableName + @VisibleForTesting Map tableNameMappings; + + @VisibleForTesting + JsonBasedSchemaTransformer(Map tableNameMappings) { + this.tableNameMappings = tableNameMappings; + LOG.info("Creating SchemaTransformer with schema mapping: {}", tableNameMappings); + } + + public static JsonBasedSchemaTransformer newSchemaTransformerFromJsonFile( + String mappingFilePath) throws IOException { + + Map tableNameMappings = null; + Type mapType = new TypeToken>() {}.getType(); + + try (Reader jsonReader = new FileReader(mappingFilePath)) { + tableNameMappings = new Gson().fromJson(jsonReader, mapType); + } + + if (tableNameMappings == null) { + throw new IllegalStateException( + "SchemaMapping file does not contain valid schema mappings"); + } + + return new JsonBasedSchemaTransformer(tableNameMappings); + } + + @Override + public ClusterSchemaDefinition transform(ClusterSchemaDefinition originalSchema) + throws DeserializationException, IOException { + ClusterSchemaDefinition transformedSchema = new ClusterSchemaDefinition(); + + // Apply the transformations. + for (TableSchemaDefinition tableSchemaDefinition : originalSchema.tableSchemaDefinitions) { + String newTableName = tableSchemaDefinition.name; + HTableDescriptor tableDescriptor = tableSchemaDefinition.getHbaseTableDescriptor(); + HTableDescriptor newTableDescriptor = tableDescriptor; + + // Override the table name if its present in the mapping file + if (tableNameMappings.containsKey(newTableName)) { + newTableName = tableNameMappings.get(newTableName); + // Rename the table and copy all the other configs, including the column families. + newTableDescriptor = + new HTableDescriptor(TableName.valueOf(newTableName), tableDescriptor); + LOG.info("Renaming table {} to {}.", tableSchemaDefinition.name, newTableName); + } + + // finalize the transformed schema + transformedSchema.addTableSchemaDefinition( + newTableDescriptor, tableSchemaDefinition.splits); + } + return transformedSchema; + } + } + @VisibleForTesting HBaseSchemaTranslator(SchemaReader schemaReader, SchemaWriter schemaWriter) { + this(schemaReader, schemaWriter, new NoopSchemaTransformer()); + } + + @VisibleForTesting + HBaseSchemaTranslator( + SchemaReader schemaReader, SchemaWriter schemaWriter, SchemaTransformer schemaTransformer) { this.schemaReader = schemaReader; this.schemaWriter = schemaWriter; + this.schemaTransformer = schemaTransformer; } - public void translate() throws IOException { + public void translate() throws IOException, DeserializationException { ClusterSchemaDefinition schemaDefinition = schemaReader.readSchema(); LOG.info("Read schema with {} tables.", schemaDefinition.tableSchemaDefinitions.size()); - this.schemaWriter.writeSchema(schemaDefinition); + + this.schemaWriter.writeSchema(schemaTransformer.transform(schemaDefinition)); } /* @@ -418,9 +530,13 @@ private static void usage(final String errorMsg) { System.err.println( " Additionally, you can filter tables to create when using HBase as source"); System.err.println(" -D " + TABLE_NAME_FILTER_KEY + "="); + System.err.println( + " Optionally, the tables can be renamed by providing a JSON map. Example JSON " + + "{\"source-table\": \"destination-table\", \"namespace:source-table2\": \"namespace-destination-table2\"}."); + System.err.println(" -D " + SCHEMA_MAPPING_FILEPATH + "=/schema/mapping/file/path.json"); } - public static void main(String[] args) throws IOException { + public static void main(String[] args) throws IOException, DeserializationException { // Configure the logger. BasicConfigurator.configure(); diff --git a/bigtable-hbase-1.x-parent/bigtable-hbase-1.x-tools/src/test/java/com/google/cloud/bigtable/hbase/tools/HBaseSchemaTransformerTest.java b/bigtable-hbase-1.x-parent/bigtable-hbase-1.x-tools/src/test/java/com/google/cloud/bigtable/hbase/tools/HBaseSchemaTransformerTest.java new file mode 100644 index 0000000000..9858fdddb1 --- /dev/null +++ b/bigtable-hbase-1.x-parent/bigtable-hbase-1.x-tools/src/test/java/com/google/cloud/bigtable/hbase/tools/HBaseSchemaTransformerTest.java @@ -0,0 +1,268 @@ +/* + * Copyright 2021 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.google.cloud.bigtable.hbase.tools; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.fail; + +import com.google.bigtable.repackaged.com.google.gson.Gson; +import com.google.bigtable.repackaged.com.google.gson.JsonObject; +import com.google.cloud.bigtable.hbase.tools.ClusterSchemaDefinition.TableSchemaDefinition; +import com.google.cloud.bigtable.hbase.tools.HBaseSchemaTranslator.JsonBasedSchemaTransformer; +import com.google.common.collect.ImmutableMap; +import java.io.FileWriter; +import java.io.IOException; +import java.io.Writer; +import java.util.HashMap; +import java.util.Map; +import org.apache.hadoop.hbase.HColumnDescriptor; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.exceptions.DeserializationException; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +@RunWith(JUnit4.class) +public class HBaseSchemaTransformerTest { + + @Rule public TemporaryFolder tempFolder = new TemporaryFolder(); + + private String schemaFilePath; + private JsonBasedSchemaTransformer schemaTransformer; + + @Before + public void before() throws IOException { + schemaFilePath = tempFolder.newFile().getPath(); + } + + ///////////////////////////////// Invalid cases ////////////////////////////////////// + @Test + public void testBadJson() throws IOException { + + // Setup bad JSON + FileWriter fw = new FileWriter(schemaFilePath); + fw.write("random JSON string"); + // Execute the test method + try { + schemaTransformer = + JsonBasedSchemaTransformer.newSchemaTransformerFromJsonFile(schemaFilePath); + fail("Expected IllegalStateException due to bad JSON."); + } catch (IllegalStateException e) { + e.printStackTrace(); + } catch (Exception e) { + fail("Expected IllegalStateException due to bad JSON but found " + e.toString()); + } + } + + //////////////////////////////////////// Happy cases //////////////////////////////////// + @Test + public void testParseJsonTables() throws IOException { + // Setup the schema file + JsonObject jsonObject = new JsonObject(); + jsonObject.addProperty("old-table", "new-table"); + // Make sure that the whole table name is matched and not just prefix + jsonObject.addProperty("old-table-2", "random-table-2"); + try (Writer writer = new FileWriter(schemaFilePath)) { + new Gson().toJson(jsonObject, writer); + } + + schemaTransformer = JsonBasedSchemaTransformer.newSchemaTransformerFromJsonFile(schemaFilePath); + + // Validate + Map expectedTableMapping = + ImmutableMap.of("old-table", "new-table", "old-table-2", "random-table-2"); + assertEquals(expectedTableMapping, schemaTransformer.tableNameMappings); + } + + @Test + public void testParseJsonEmpty() throws IOException { + // Setup the schema file + JsonObject jsonObject = new JsonObject(); + try (Writer writer = new FileWriter(schemaFilePath)) { + new Gson().toJson(jsonObject, writer); + } + + // Parse schema file + schemaTransformer = JsonBasedSchemaTransformer.newSchemaTransformerFromJsonFile(schemaFilePath); + assertEquals(0, schemaTransformer.tableNameMappings.size()); + } + + @Test + public void testTransformWithNormalTable() throws DeserializationException, IOException { + + // Setup a cluster schema + ClusterSchemaDefinition schemaDefinition = new ClusterSchemaDefinition(); + TableSchemaDefinition tableSchemaDefinition = new TableSchemaDefinition(); + tableSchemaDefinition.name = "test-table1"; + tableSchemaDefinition.splits = new byte[3][]; + tableSchemaDefinition.splits[0] = HConstants.EMPTY_BYTE_ARRAY; + tableSchemaDefinition.splits[1] = "first-split".getBytes(); + tableSchemaDefinition.splits[2] = "second-split".getBytes(); + HTableDescriptor tableDescriptor = new HTableDescriptor(TableName.valueOf("test-table1")); + HColumnDescriptor columnDescriptor = new HColumnDescriptor("cf"); + columnDescriptor.setMaxVersions(2).setTimeToLive(1000); + tableDescriptor.addFamily(columnDescriptor); + tableSchemaDefinition.tableDescriptor = tableDescriptor.toByteArray(); + schemaDefinition.tableSchemaDefinitions.add(tableSchemaDefinition); + + // setup a transformer + Map schemaMapping = new HashMap<>(); + // NOOP transform + schemaMapping.put("test-table1", "test-table1"); + JsonBasedSchemaTransformer transformer = new JsonBasedSchemaTransformer(schemaMapping); + + ClusterSchemaDefinition transformedClusterSchema = transformer.transform(schemaDefinition); + + // Verify transformation + Assert.assertEquals( + transformedClusterSchema.tableSchemaDefinitions.get(0).name, + schemaDefinition.tableSchemaDefinitions.get(0).name); + Assert.assertEquals( + transformedClusterSchema.tableSchemaDefinitions.get(0).getHbaseTableDescriptor(), + schemaDefinition.tableSchemaDefinitions.get(0).getHbaseTableDescriptor()); + Assert.assertEquals( + transformedClusterSchema.tableSchemaDefinitions.get(0).splits, + schemaDefinition.tableSchemaDefinitions.get(0).splits); + } + + @Test + public void testTransformWithEmptyMappings() throws DeserializationException, IOException { + + // Setup a cluster schema + ClusterSchemaDefinition schemaDefinition = new ClusterSchemaDefinition(); + TableSchemaDefinition tableSchemaDefinition = new TableSchemaDefinition(); + tableSchemaDefinition.name = "test-table1"; + tableSchemaDefinition.splits = new byte[3][]; + tableSchemaDefinition.splits[0] = HConstants.EMPTY_BYTE_ARRAY; + tableSchemaDefinition.splits[1] = "first-split".getBytes(); + tableSchemaDefinition.splits[2] = "second-split".getBytes(); + HTableDescriptor tableDescriptor = new HTableDescriptor(TableName.valueOf("test-table1")); + HColumnDescriptor columnDescriptor = new HColumnDescriptor("cf"); + columnDescriptor.setMaxVersions(2).setTimeToLive(1000); + tableDescriptor.addFamily(columnDescriptor); + tableSchemaDefinition.tableDescriptor = tableDescriptor.toByteArray(); + schemaDefinition.tableSchemaDefinitions.add(tableSchemaDefinition); + + // setup a transformer + Map schemaMapping = new HashMap<>(); + JsonBasedSchemaTransformer transformer = new JsonBasedSchemaTransformer(schemaMapping); + + ClusterSchemaDefinition transformedClusterSchema = transformer.transform(schemaDefinition); + + // Verify transformation + Assert.assertEquals( + transformedClusterSchema.tableSchemaDefinitions.get(0).name, + schemaDefinition.tableSchemaDefinitions.get(0).name); + Assert.assertEquals( + transformedClusterSchema.tableSchemaDefinitions.get(0).getHbaseTableDescriptor(), + schemaDefinition.tableSchemaDefinitions.get(0).getHbaseTableDescriptor()); + Assert.assertEquals( + transformedClusterSchema.tableSchemaDefinitions.get(0).splits, + schemaDefinition.tableSchemaDefinitions.get(0).splits); + } + + @Test + public void testTransformWithExtraMapping() throws DeserializationException, IOException { + + // Setup a cluster schema + ClusterSchemaDefinition schemaDefinition = new ClusterSchemaDefinition(); + TableSchemaDefinition tableSchemaDefinition = new TableSchemaDefinition(); + tableSchemaDefinition.name = "test-table1"; + tableSchemaDefinition.splits = new byte[3][]; + tableSchemaDefinition.splits[0] = HConstants.EMPTY_BYTE_ARRAY; + tableSchemaDefinition.splits[1] = "first-split".getBytes(); + tableSchemaDefinition.splits[2] = "second-split".getBytes(); + HTableDescriptor tableDescriptor = new HTableDescriptor(TableName.valueOf("test-table1")); + HColumnDescriptor columnDescriptor = new HColumnDescriptor("cf"); + columnDescriptor.setMaxVersions(2).setTimeToLive(1000); + tableDescriptor.addFamily(columnDescriptor); + tableSchemaDefinition.tableDescriptor = tableDescriptor.toByteArray(); + schemaDefinition.tableSchemaDefinitions.add(tableSchemaDefinition); + + // setup a transformer + Map schemaMapping = new HashMap<>(); + // NOOP transform + schemaMapping.put("test-table1", "new-table1"); + schemaMapping.put("test", "new-table2"); + JsonBasedSchemaTransformer transformer = new JsonBasedSchemaTransformer(schemaMapping); + + ClusterSchemaDefinition transformedClusterSchema = transformer.transform(schemaDefinition); + TableSchemaDefinition expectedTableSchema = + new TableSchemaDefinition( + new HTableDescriptor( + TableName.valueOf("new-table1"), + schemaDefinition.tableSchemaDefinitions.get(0).getHbaseTableDescriptor()), + tableSchemaDefinition.splits); + + // Verify transformation + Assert.assertEquals("new-table1", transformedClusterSchema.tableSchemaDefinitions.get(0).name); + Assert.assertEquals( + expectedTableSchema.getHbaseTableDescriptor(), + transformedClusterSchema.tableSchemaDefinitions.get(0).getHbaseTableDescriptor()); + Assert.assertEquals( + transformedClusterSchema.tableSchemaDefinitions.get(0).splits, + schemaDefinition.tableSchemaDefinitions.get(0).splits); + } + + @Test + public void testTransformWithinCustomNamespaceTable() + throws DeserializationException, IOException { + + // Setup a cluster schema + ClusterSchemaDefinition schemaDefinition = new ClusterSchemaDefinition(); + TableSchemaDefinition tableSchemaDefinition = new TableSchemaDefinition(); + tableSchemaDefinition.name = "ns:test-table1"; + tableSchemaDefinition.splits = new byte[3][]; + tableSchemaDefinition.splits[0] = HConstants.EMPTY_BYTE_ARRAY; + tableSchemaDefinition.splits[1] = "first-split".getBytes(); + tableSchemaDefinition.splits[2] = "second-split".getBytes(); + HTableDescriptor tableDescriptor = new HTableDescriptor(TableName.valueOf("ns", "test-table1")); + HColumnDescriptor columnDescriptor = new HColumnDescriptor("cf"); + columnDescriptor.setMaxVersions(2).setTimeToLive(1000); + tableDescriptor.addFamily(columnDescriptor); + tableSchemaDefinition.tableDescriptor = tableDescriptor.toByteArray(); + schemaDefinition.tableSchemaDefinitions.add(tableSchemaDefinition); + + // setup a transformer + Map schemaMapping = new HashMap<>(); + // NOOP transform + schemaMapping.put("ns:test-table1", "new-table1"); + JsonBasedSchemaTransformer transformer = new JsonBasedSchemaTransformer(schemaMapping); + + ClusterSchemaDefinition transformedClusterSchema = transformer.transform(schemaDefinition); + TableSchemaDefinition expectedTableSchema = + new TableSchemaDefinition( + new HTableDescriptor( + TableName.valueOf("new-table1"), + schemaDefinition.tableSchemaDefinitions.get(0).getHbaseTableDescriptor()), + tableSchemaDefinition.splits); + + // Verify transformation + Assert.assertEquals("new-table1", transformedClusterSchema.tableSchemaDefinitions.get(0).name); + Assert.assertEquals( + expectedTableSchema.getHbaseTableDescriptor(), + transformedClusterSchema.tableSchemaDefinitions.get(0).getHbaseTableDescriptor()); + Assert.assertEquals( + transformedClusterSchema.tableSchemaDefinitions.get(0).splits, + schemaDefinition.tableSchemaDefinitions.get(0).splits); + } +} diff --git a/bigtable-hbase-1.x-parent/bigtable-hbase-1.x-tools/src/test/java/com/google/cloud/bigtable/hbase/tools/HBaseSchemaTranslationOptionsTest.java b/bigtable-hbase-1.x-parent/bigtable-hbase-1.x-tools/src/test/java/com/google/cloud/bigtable/hbase/tools/HBaseSchemaTranslationOptionsTest.java index 55c7d30133..61301deed8 100644 --- a/bigtable-hbase-1.x-parent/bigtable-hbase-1.x-tools/src/test/java/com/google/cloud/bigtable/hbase/tools/HBaseSchemaTranslationOptionsTest.java +++ b/bigtable-hbase-1.x-parent/bigtable-hbase-1.x-tools/src/test/java/com/google/cloud/bigtable/hbase/tools/HBaseSchemaTranslationOptionsTest.java @@ -106,12 +106,14 @@ public void parseSchemaTranslationOptionsHbaseToFile() { System.setProperty(HBaseSchemaTranslator.ZOOKEEPER_PORT_KEY, "1080"); System.setProperty(HBaseSchemaTranslator.TABLE_NAME_FILTER_KEY, "hbase-.*"); System.setProperty(HBaseSchemaTranslator.OUTPUT_FILE_KEY, "/tmp/output"); + System.setProperty(HBaseSchemaTranslator.SCHEMA_MAPPING_FILEPATH, "/tmp/schema_mapping.json"); SchemaTranslationOptions options = SchemaTranslationOptions.loadOptionsFromSystemProperties(); Assert.assertEquals("localhost", options.zookeeperQuorum); Assert.assertEquals((Integer) 1080, options.zookeeperPort); Assert.assertEquals("/tmp/output", options.outputFilePath); Assert.assertEquals("hbase-.*", options.tableNameFilter); + Assert.assertEquals("/tmp/schema_mapping.json", options.schemaMappingFilePath); } @Test diff --git a/bigtable-hbase-1.x-parent/bigtable-hbase-1.x-tools/src/test/java/com/google/cloud/bigtable/hbase/tools/HBaseSchemaTranslatorTest.java b/bigtable-hbase-1.x-parent/bigtable-hbase-1.x-tools/src/test/java/com/google/cloud/bigtable/hbase/tools/HBaseSchemaTranslatorTest.java index 6e8eaf8f46..eb8bcc3c1e 100644 --- a/bigtable-hbase-1.x-parent/bigtable-hbase-1.x-tools/src/test/java/com/google/cloud/bigtable/hbase/tools/HBaseSchemaTranslatorTest.java +++ b/bigtable-hbase-1.x-parent/bigtable-hbase-1.x-tools/src/test/java/com/google/cloud/bigtable/hbase/tools/HBaseSchemaTranslatorTest.java @@ -15,6 +15,7 @@ */ package com.google.cloud.bigtable.hbase.tools; +import static org.junit.Assert.assertEquals; import static org.junit.Assert.fail; import static org.mockito.ArgumentMatchers.eq; @@ -23,11 +24,15 @@ import com.google.cloud.bigtable.hbase.tools.HBaseSchemaTranslator.FileBasedSchemaReader; import com.google.cloud.bigtable.hbase.tools.HBaseSchemaTranslator.FileBasedSchemaWriter; import com.google.cloud.bigtable.hbase.tools.HBaseSchemaTranslator.HBaseSchemaReader; +import com.google.cloud.bigtable.hbase.tools.HBaseSchemaTranslator.JsonBasedSchemaTransformer; import java.io.File; +import java.io.FileWriter; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; +import java.util.HashMap; import java.util.List; +import java.util.Map; import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; @@ -41,6 +46,7 @@ import org.junit.rules.TemporaryFolder; import org.junit.runner.RunWith; import org.junit.runners.JUnit4; +import org.mockito.ArgumentCaptor; import org.mockito.Mock; import org.mockito.Mockito; import org.mockito.junit.MockitoJUnit; @@ -148,7 +154,9 @@ public void testTranslateFromHBaseToBigtable() throws IOException, Deserializati Mockito.verify(btAdmin) .createTable( eq(schemaDefinition.tableSchemaDefinitions.get(0).getHbaseTableDescriptor()), - eq(schemaDefinition.tableSchemaDefinitions.get(0).splits)); + // First split is EMPTY_BYTE_ARRAY which gets filtered as CBT does not support empty + // splits. + eq(Arrays.copyOfRange(schemaDefinition.tableSchemaDefinitions.get(0).splits, 1, 3))); Mockito.verify(btAdmin) .createTable( eq(schemaDefinition.tableSchemaDefinitions.get(1).getHbaseTableDescriptor()), @@ -158,6 +166,58 @@ public void testTranslateFromHBaseToBigtable() throws IOException, Deserializati Mockito.verify(hbaseAdmin).getTableRegions(TableName.valueOf("test-table2")); } + @Test + public void testTranslateFromHBaseToBigtableWithTransformation() + throws IOException, DeserializationException { + // Setup + Mockito.when(hbaseAdmin.listTables(eq("test.*"))).thenReturn(getTables()); + Mockito.when(hbaseAdmin.getTableRegions(eq(TableName.valueOf("test-table1")))) + .thenReturn(getRegions(0)); + Mockito.when(hbaseAdmin.getTableRegions(eq(TableName.valueOf("test-table2")))) + .thenReturn(getRegions(1)); + ArgumentCaptor tableCaptor = ArgumentCaptor.forClass(HTableDescriptor.class); + + Map schemaMapping = new HashMap<>(); + schemaMapping.put("test-table1", "new-test-table1"); + // NO Op renaming + schemaMapping.put("test-table2", "test-table2"); + // Not found in the schema, should get discarded. + schemaMapping.put("test", "new-test"); + + HBaseSchemaTranslator translator = + new HBaseSchemaTranslator( + new HBaseSchemaReader(hbaseAdmin, "test.*"), + new BigtableSchemaWriter(btAdmin), + new JsonBasedSchemaTransformer(schemaMapping)); + + // Call + translator.translate(); + + // Verify + Mockito.verify(btAdmin) + .createTable( + tableCaptor.capture(), + // First split is EMPTY_BYTE_ARRAY which gets filtered as CBT does not support empty + // splits. + eq(Arrays.copyOfRange(schemaDefinition.tableSchemaDefinitions.get(0).splits, 1, 3))); + Mockito.verify(btAdmin) + .createTable( + tableCaptor.capture(), eq(schemaDefinition.tableSchemaDefinitions.get(1).splits)); + HTableDescriptor transformedTable = tableCaptor.getAllValues().get(0); + assertEquals("new-test-table1", transformedTable.getNameAsString()); + assertEquals("cf", transformedTable.getColumnFamilies()[0].getNameAsString()); + // Cloud Bigtable only uses the GC policies from HColumnFamily object + assertEquals(2, transformedTable.getColumnFamilies()[0].getMaxVersions()); + assertEquals(1000, transformedTable.getColumnFamilies()[0].getTimeToLive()); + assertEquals( + schemaDefinition.tableSchemaDefinitions.get(1).getHbaseTableDescriptor(), + tableCaptor.getAllValues().get(1)); + + Mockito.verify(hbaseAdmin).listTables("test.*"); + Mockito.verify(hbaseAdmin).getTableRegions(TableName.valueOf("test-table1")); + Mockito.verify(hbaseAdmin).getTableRegions(TableName.valueOf("test-table2")); + } + @Test public void testTranslateHBaseToBigtableViaFile() throws IOException, DeserializationException { // Setup @@ -186,7 +246,9 @@ public void testTranslateHBaseToBigtableViaFile() throws IOException, Deserializ Mockito.verify(btAdmin) .createTable( eq(schemaDefinition.tableSchemaDefinitions.get(0).getHbaseTableDescriptor()), - eq(schemaDefinition.tableSchemaDefinitions.get(0).splits)); + // First split is EMPTY_BYTE_ARRAY which gets filtered as CBT does not support empty + // splits. + eq(Arrays.copyOfRange(schemaDefinition.tableSchemaDefinitions.get(0).splits, 1, 3))); Mockito.verify(btAdmin) .createTable( eq(schemaDefinition.tableSchemaDefinitions.get(1).getHbaseTableDescriptor()), @@ -262,7 +324,9 @@ public void testBigtableCreateTableFails() throws IOException, DeserializationEx .when(btAdmin) .createTable( eq(schemaDefinition.tableSchemaDefinitions.get(0).getHbaseTableDescriptor()), - eq(schemaDefinition.tableSchemaDefinitions.get(0).splits)); + // First split is EMPTY_BYTE_ARRAY which gets filtered as CBT does not support empty + // splits. + eq(Arrays.copyOfRange(schemaDefinition.tableSchemaDefinitions.get(0).splits, 1, 3))); HBaseSchemaTranslator translator = new HBaseSchemaTranslator( @@ -282,16 +346,41 @@ public void testBigtableCreateTableFails() throws IOException, DeserializationEx Mockito.verify(btAdmin) .createTable( eq(schemaDefinition.tableSchemaDefinitions.get(0).getHbaseTableDescriptor()), - eq(schemaDefinition.tableSchemaDefinitions.get(0).splits)); + // First split is EMPTY_BYTE_ARRAY which gets filtered as CBT does not support empty + // splits. + eq(Arrays.copyOfRange(schemaDefinition.tableSchemaDefinitions.get(0).splits, 1, 3))); + // Validate that translator calls createTable for test-table-2 even after creation of + // test-table1 failed. Mockito.verify(btAdmin) .createTable( eq(schemaDefinition.tableSchemaDefinitions.get(1).getHbaseTableDescriptor()), eq(schemaDefinition.tableSchemaDefinitions.get(1).splits)); Mockito.verify(hbaseAdmin).listTables(".*"); Mockito.verify(hbaseAdmin).getTableRegions(TableName.valueOf("test-table1")); - // Validate that translator calls createTable for test-table-2 even after creation of - // test-table1 failed. Mockito.verify(hbaseAdmin).getTableRegions(TableName.valueOf("test-table2")); } } + + @Test + public void testHBaseSchemaTranslatorCreationFails() + throws IOException, DeserializationException { + // Setup + File schemaFile = tempFolder.newFile("schema.json"); + FileWriter fileWriter = new FileWriter(schemaFile.getPath()); + fileWriter.write("{This is invalid JSON}"); + + // Try creating the schema translator with invalid JSON, expect it to fail. + try { + HBaseSchemaTranslator translator = + new HBaseSchemaTranslator( + new HBaseSchemaReader(hbaseAdmin, ".*"), + new BigtableSchemaWriter(btAdmin), + JsonBasedSchemaTransformer.newSchemaTransformerFromJsonFile(schemaFile.getPath())); + fail("Expected IOException here."); + } catch (IllegalStateException e) { + // Expected. + } catch (Exception e) { + fail("Expected DeserializationException but found: " + e.toString()); + } + } }