Skip to content

Commit

Permalink
[GOBBLIN-2043] Set execute bit only for new folders in manifest distcp (
Browse files Browse the repository at this point in the history
#3922)

Set execute bit only for new folders in manifest distcp
  • Loading branch information
Will-Lo committed Apr 17, 2024
1 parent cb5a215 commit e79375a
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -26,17 +26,20 @@
import com.google.gson.JsonSyntaxException;
import java.io.IOException;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.TreeMap;
import java.util.concurrent.TimeUnit;
import lombok.extern.slf4j.Slf4j;
import org.apache.gobblin.commit.CommitStep;
import org.apache.gobblin.data.management.copy.entities.PostPublishStep;
import org.apache.gobblin.data.management.copy.entities.PrePublishStep;
import org.apache.gobblin.data.management.partition.FileSet;
import org.apache.gobblin.util.PathUtils;
import org.apache.gobblin.util.commit.DeleteFileCommitStep;
import org.apache.gobblin.util.commit.SetPermissionCommitStep;

Expand Down Expand Up @@ -130,10 +133,12 @@ public Iterator<FileSet<CopyEntity>> getFileSetIterator(FileSystem targetFs, Cop
copyEntities.add(copyableFile);

Path fromPath = srcFs.getFileStatus(fileToCopy).isDirectory() ? fileToCopy : fileToCopy.getParent();

ancestorOwnerAndPermissions.putAll(
CopyableFile.resolveReplicatedAncestorOwnerAndPermissionsRecursively(srcFs, fromPath,
new Path(commonFilesParent), configuration));
// Avoid duplicate calculation for the same ancestor
if (!ancestorOwnerAndPermissions.containsKey(PathUtils.getPathWithoutSchemeAndAuthority(fromPath).toString())) {
ancestorOwnerAndPermissions.putAll(
CopyableFile.resolveReplicatedAncestorOwnerAndPermissionsRecursively(srcFs, fromPath,
new Path(commonFilesParent), configuration));
}

if (existOnTarget && srcFile.isFile()) {
// this is to match the existing publishing behavior where we won't rewrite the target when it's already existed
Expand All @@ -146,6 +151,14 @@ public Iterator<FileSet<CopyEntity>> getFileSetIterator(FileSystem targetFs, Cop
}
}

// Only set permission for newly created folders on target
// To change permissions for existing dirs, expectation is to add the folder to the manifest
Set<String> parentFolders = new HashSet<>(ancestorOwnerAndPermissions.keySet());
for (String folder : parentFolders) {
if (targetFs.exists(new Path(folder))) {
ancestorOwnerAndPermissions.remove(folder);
}
}
Properties props = new Properties();
props.setProperty(SetPermissionCommitStep.STOP_ON_ERROR_KEY, "true");
CommitStep setPermissionCommitStep = new SetPermissionCommitStep(targetFs, ancestorOwnerAndPermissions, props);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.security.AccessControlException;

import lombok.Getter;
import lombok.extern.slf4j.Slf4j;

import org.apache.gobblin.commit.CommitStep;
Expand All @@ -38,6 +39,7 @@
*/
@Slf4j
public class SetPermissionCommitStep implements CommitStep {
@Getter
Map<String, OwnerAndPermission> pathAndPermissions;
private final URI fsUri;
public final boolean stopOnError;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,8 @@ public void testFindFiles() throws IOException, URISyntaxException {
FileSet<CopyEntity> fileSet = fileSets.next();
Assert.assertEquals(fileSet.getFiles().size(), 3); // 2 files to copy + 1 post publish step
Assert.assertTrue(((PostPublishStep) fileSet.getFiles().get(2)).getStep() instanceof SetPermissionCommitStep);
SetPermissionCommitStep step = (SetPermissionCommitStep) ((PostPublishStep) fileSet.getFiles().get(2)).getStep();
Assert.assertEquals(step.getPathAndPermissions().keySet().size(), 2);
Mockito.verify(manifestReadFs, Mockito.times(1)).exists(manifestPath);
Mockito.verify(manifestReadFs, Mockito.times(1)).getFileStatus(manifestPath);
Mockito.verify(manifestReadFs, Mockito.times(1)).open(manifestPath);
Expand Down Expand Up @@ -178,6 +180,7 @@ private void setSourceAndDestFsMocks(FileSystem sourceFs, FileSystem destFs, Pat
Mockito.when(sourceFs.getUri()).thenReturn(SRC_FS_URI);
Mockito.when(manifestReadFs.getUri()).thenReturn(MANIFEST_READ_FS_URI);
Mockito.when(destFs.getUri()).thenReturn(DEST_FS_URI);
Mockito.when(destFs.exists(new Path("/tmp"))).thenReturn(true);
Mockito.when(sourceFs.getFileStatus(any(Path.class))).thenReturn(localFs.getFileStatus(new Path(tmpDir.toString())));
Mockito.when(sourceFs.exists(any(Path.class))).thenReturn(true);
Mockito.when(manifestReadFs.exists(any(Path.class))).thenReturn(true);
Expand Down

0 comments on commit e79375a

Please sign in to comment.