Skip to content

Commit

Permalink
[PFS-188] Improve put file UX and fix trailing slash bug (#9636)
Browse files Browse the repository at this point in the history
This commit is meant to address issues related to `pachctl put file`

Behavior before commit:

- `pachctl put file repo@branch:/ -f file.txt` will currently break
everything until you delete the corresponding commit
- `pachctl put file repo@branch:/dir -f file.txt` creates `/dir` as
expected, but `pachctl put file repo@branch:/dir/ -f file.txt` also
creates `/dir`
- `pachctl put file put-test@master:/dir/ -f dir2/file.txt --full-path`
creates `/dir`

Behavior after commit:

- `pachctl put file repo@branch:/ -f file.txt` creates `/file.txt`
- `pachctl put file repo@branch:/dir/ -f file.txt` creates
`/dir/file.txt`
- `pachctl put file put-test@master:/dir/ -f dir2/file.txt --full-path`
creates `/dir/dir2/file.txt`
  • Loading branch information
BOsterbuhr committed Jan 10, 2024
1 parent 6897d1b commit b9dd925
Show file tree
Hide file tree
Showing 2 changed files with 175 additions and 21 deletions.
39 changes: 18 additions & 21 deletions src/server/pfs/cmds/cmds.go
Original file line number Diff line number Diff line change
Expand Up @@ -1517,21 +1517,18 @@ func Cmds(mainCtx context.Context, pachCtx *config.Context, pachctlCfg *pachctl.
"\t- To compress files before uploading, use the `-c` flag \n" +
"\t- To define the maximum number of files that can be uploaded in parallel, use the `-p` flag \n" +
"\t- To append to an existing file, use the `-a` flag \n" +
"\n" +
"Other: \n" +
"\t- To enable progress bars, use the `-P` flag \n",
"\n",

Example: "\t- {{alias}} repo@master-f image.png \n" +
"\t- {{alias}} repo@master:/logs/log-1.txt \n" +
Example: "\t- {{alias}} repo@master -f image.png \n" +
"\t- {{alias}} -r repo@master -f my-directory \n" +
"\t- {{alias}} -r repo@branch:/path -f my-directory \n" +
"\t- {{alias}} -r repo@branch -f s3://my_bucket \n" +
"\t- {{alias}} repo@branch -f http://host/example.png \n" +
"\t- {{alias}} repo@branch:/dir -f http://host/example.png \n" +
"\t- {{alias}} repo@branch -r -f s3://my_bucket \n" +
"\t- {{alias}} repo@branch:/dir/ -f http://host/example.png \n" +
"\t- {{alias}} repo@branch -i file \n" +
"\t- {{alias}} repo@branch -i http://host/path \n" +
"\t- {{alias}} repo@branch -f -untar dir.tar \n" +
"\t- {{alias}} repo@branch -f -c image.png \n",
"\t- {{alias}} --untar repo@branch -f dir.tar \n" +
"\t- {{alias}} -c repo@branch -f image.png \n",

Run: cmdutil.RunFixedArgs(1, func(args []string) (retErr error) {
if !enableProgress {
Expand Down Expand Up @@ -1607,31 +1604,31 @@ func Cmds(mainCtx context.Context, pachCtx *config.Context, pachctlCfg *pachctl.
return c.WithModifyFileClient(file.Commit, func(mf client.ModifyFile) error {
for _, source := range sources {
source := source
target := source
if !fullPath {
target = filepath.Base(source)
}
if file.Path == "" {
// The user has not specified a path so we use source as path.
if source == "-" {
return errors.Errorf("must specify filename when reading data from stdin")
}
target := source
if !fullPath {
target = filepath.Base(source)
}
if err := putFileHelper(mf, joinPaths("", target), source, recursive, appendFile, untar); err != nil {
return err
}
} else if len(sources) == 1 {
// We have a single source and the user has specified a path,
// we use the path and ignore source (in terms of naming the file).
if err := putFileHelper(mf, file.Path, source, recursive, appendFile, untar); err != nil {
// we check if the path ends with a '/' and join with target if it does.
finalPath := file.Path
if strings.HasSuffix(file.Path, "/") {
finalPath = joinPaths(file.Path, target)
}
if err := putFileHelper(mf, finalPath, source, recursive, appendFile, untar); err != nil {
return err
}
} else {
// We have multiple sources and the user has specified a path,
// we use that path as a prefix for the filepaths.
target := source
if !fullPath {
target = filepath.Base(source)
}
if err := putFileHelper(mf, joinPaths(file.Path, target), source, recursive, appendFile, untar); err != nil {
return err
}
Expand All @@ -1644,7 +1641,7 @@ func Cmds(mainCtx context.Context, pachCtx *config.Context, pachctlCfg *pachctl.
putFile.Flags().StringSliceVarP(&filePaths, "file", "f", []string{"-"}, "Specify the file to be put; it can be a local file or a URL.")
putFile.Flags().StringVarP(&inputFile, "input-file", "i", "", "Specify file provided contains a list of files to be put (as paths or URLs).")
putFile.Flags().BoolVarP(&recursive, "recursive", "r", false, "Specify files should be recursively put into a directory.")
putFile.Flags().BoolVarP(&compress, "compress", "", false, "Specify data should be compressed during upload. This parameter might help you upload your uncompressed data, such as CSV files, to Pachyderm faster. Use 'compress' with caution, because if your data is already compressed, this parameter might slow down the upload speed instead of increasing.")
putFile.Flags().BoolVarP(&compress, "compress", "c", false, "Specify data should be compressed during upload. This parameter might help you upload your uncompressed data, such as CSV files, to Pachyderm faster. Use 'compress' with caution, because if your data is already compressed, this parameter might slow down the upload speed instead of increasing.")
putFile.Flags().IntVarP(&parallelism, "parallelism", "p", DefaultParallelism, "Set the maximum number of files that can be uploaded in parallel.")
putFile.Flags().BoolVarP(&appendFile, "append", "a", false, "Specify file contents should be appended to existing content from previous commits or previous calls to 'pachctl put file' within this commit.")
putFile.Flags().BoolVar(&enableProgress, "progress", isatty.IsTerminal(os.Stdout.Fd()) || isatty.IsCygwinTerminal(os.Stdout.Fd()), "Print progress bars.")
Expand All @@ -1656,7 +1653,7 @@ func Cmds(mainCtx context.Context, pachCtx *config.Context, pachctlCfg *pachctl.
if flag == "-f" || flag == "--file" || flag == "-i" || flag == "input-file" {
cs, cf := shell.FilesystemCompletion(flag, text, maxCompletions)
return cs, shell.AndCacheFunc(cf, shell.SameFlag(flag))
} else if flag == "" || flag == "-c" || flag == "--commit" || flag == "-o" || flag == "--append" {
} else if flag == "" || flag == "-c" || flag == "--compress" || flag == "-o" || flag == "--append" {
cs, cf := shell.FileCompletion(flag, text, maxCompletions)
return cs, shell.AndCacheFunc(cf, shell.SameFlag(flag))
}
Expand Down
157 changes: 157 additions & 0 deletions src/server/pfs/cmds/cmds_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"context"
"fmt"
"os"
"path/filepath"
"strconv"
"strings"
"testing"
Expand Down Expand Up @@ -110,6 +111,162 @@ func TestCommit(t *testing.T) {
).Run())
}

func TestPutFileFullPathNoFilePath(t *testing.T) {
if testing.Short() {
t.Skip("Skipping integration tests in short mode")
}

ctx := pctx.TestContext(t)
env := realenv.NewRealEnv(ctx, t, dockertestenv.NewTestDBConfig(t).PachConfigOption)
mockInspectCluster(env)
c := env.PachClient

// Create a temporary directory and file with nested structure
tmpDir, err := os.MkdirTemp("", "pachyderm_test_put_file_full_path")
require.NoError(t, err)
defer os.RemoveAll(tmpDir)

nestedDir := filepath.Join(tmpDir, "nested/dir")
require.NoError(t, os.MkdirAll(nestedDir, 0755))

filePath := filepath.Join(nestedDir, "testfile.txt")
require.NoError(t, os.WriteFile(filePath, []byte("test data"), 0644))

repoName := tu.UniqueString("TestPutFileFullPathNoFilePath")

// Create repo and put file with fullPath flag, and verify
require.NoError(t, tu.PachctlBashCmd(t, c, `
pachctl create repo {{.repo}}
pachctl put file {{.repo}}@master -f {{.filePath}} --full-path
pachctl get file "{{.repo}}@master:{{.filePath}}" \
| match "test data"
`, "repo", repoName, "filePath", filePath).Run())
}

func TestPutFileFullPathWithFilePath(t *testing.T) {
if testing.Short() {
t.Skip("Skipping integration tests in short mode")
}

ctx := pctx.TestContext(t)
env := realenv.NewRealEnv(ctx, t, dockertestenv.NewTestDBConfig(t).PachConfigOption)
mockInspectCluster(env)
c := env.PachClient

// Create a temporary file in a nested directory
tmpDir, err := os.MkdirTemp("", "pachyderm_test_full_path_with_file_path")
require.NoError(t, err)
defer os.RemoveAll(tmpDir)

nestedDir := filepath.Join(tmpDir, "nested/dir")
require.NoError(t, os.MkdirAll(nestedDir, 0755))

filePath := filepath.Join(nestedDir, "testfile.txt")
require.NoError(t, os.WriteFile(filePath, []byte("test data"), 0644))

repoName := tu.UniqueString("TestPutFileFullPathWithFilePath")
targetFilePath := "specific/target/file.txt"

// Create repo, put file with file.Path and fullPath flag, and verify
require.NoError(t, tu.PachctlBashCmd(t, c, `
pachctl create repo {{.repo}}
pachctl put file {{.repo}}@master:{{.targetFilePath}} -f {{.filePath}} --full-path
pachctl get file "{{.repo}}@master:{{.targetFilePath}}" \
| match "test data"
`, "repo", repoName, "targetFilePath", targetFilePath, "filePath", filePath).Run())
}

func TestPutFileFullPathWithFilePathEndingSlash(t *testing.T) {
if testing.Short() {
t.Skip("Skipping integration tests in short mode")
}

ctx := pctx.TestContext(t)
env := realenv.NewRealEnv(ctx, t, dockertestenv.NewTestDBConfig(t).PachConfigOption)
mockInspectCluster(env)
c := env.PachClient

// Create a temporary file in a nested directory
tmpDir, err := os.MkdirTemp("", "pachyderm_test_full_path_with_file_path_ending_slash")
require.NoError(t, err)
defer os.RemoveAll(tmpDir)

nestedDir := filepath.Join(tmpDir, "nested/dir")
require.NoError(t, os.MkdirAll(nestedDir, 0755))

filePath := filepath.Join(nestedDir, "testfile.txt")
require.NoError(t, os.WriteFile(filePath, []byte("test data"), 0644))

repoName := tu.UniqueString("TestPutFileFullPathWithFilePathEndingSlash")
targetPrefix := "nested/dir/"

// Create repo, put file with file.Path ending with '/' and fullPath flag, and verify
require.NoError(t, tu.PachctlBashCmd(t, c, `
pachctl create repo {{.repo}}
pachctl put file {{.repo}}@master:{{.targetPrefix}} -f {{.filePath}} --full-path
pachctl get file "{{.repo}}@master:{{.targetPrefix}}{{.filePath}}" \
| match "test data"
`, "repo", repoName, "targetPrefix", targetPrefix, "filePath", filePath).Run())
}

func TestPutFileFilePathEndsWithSlashSingleSource(t *testing.T) {
if testing.Short() {
t.Skip("Skipping integration tests in short mode")
}

ctx := pctx.TestContext(t)
env := realenv.NewRealEnv(ctx, t, dockertestenv.NewTestDBConfig(t).PachConfigOption)
mockInspectCluster(env)
c := env.PachClient

// Create a temporary file
tmpFile, err := os.CreateTemp("", "pachyderm_test_put_file_single_source")
require.NoError(t, err)
defer os.Remove(tmpFile.Name())

require.NoError(t, os.WriteFile(tmpFile.Name(), []byte("test data"), 0644))

repoName := tu.UniqueString("TestPutFileFilePathEndsWithSlashSingleSource")
targetPrefix := "dir/"

// Create repo, put file with file.Path ending with '/', and verify
require.NoError(t, tu.PachctlBashCmd(t, c, `
pachctl create repo {{.repo}}
pachctl put file {{.repo}}@master:{{.targetPrefix}} -f {{.fileName}}
pachctl get file "{{.repo}}@master:{{.targetPrefix}}{{.baseFileName}}" \
| match "test data"
`, "repo", repoName, "targetPrefix", targetPrefix, "fileName", tmpFile.Name(), "baseFileName", filepath.Base(tmpFile.Name())).Run())
}

func TestPutFileFilePathWithoutSlashSingleSource(t *testing.T) {
if testing.Short() {
t.Skip("Skipping integration tests in short mode")
}

ctx := pctx.TestContext(t)
env := realenv.NewRealEnv(ctx, t, dockertestenv.NewTestDBConfig(t).PachConfigOption)
mockInspectCluster(env)
c := env.PachClient

// Create a temporary file
tmpFile, err := os.CreateTemp("", "pachyderm_test_put_file_single_source_no_slash")
require.NoError(t, err)
defer os.Remove(tmpFile.Name())

require.NoError(t, os.WriteFile(tmpFile.Name(), []byte("test data"), 0644))

repoName := tu.UniqueString("TestPutFileFilePathWithoutSlashSingleSource")
targetFilePath := "specific/target/path/file.txt"

// Create repo, put file with file.Path without ending '/', and verify
require.NoError(t, tu.PachctlBashCmd(t, c, `
pachctl create repo {{.repo}}
pachctl put file {{.repo}}@master:{{.targetFilePath}} -f {{.fileName}}
pachctl get file "{{.repo}}@master:{{.targetFilePath}}" \
| match "test data"
`, "repo", repoName, "targetFilePath", targetFilePath, "fileName", tmpFile.Name()).Run())
}

func TestPutFileTAR(t *testing.T) {
if testing.Short() {
t.Skip("Skipping integration tests in short mode")
Expand Down

0 comments on commit b9dd925

Please sign in to comment.