Skip to content

Commit

Permalink
command/sync: add --include flag (#600)
Browse files Browse the repository at this point in the history
  • Loading branch information
ahmethakanbesel committed Aug 8, 2023
1 parent 5bb45b6 commit 4c30eb3
Show file tree
Hide file tree
Showing 14 changed files with 640 additions and 66 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Expand Up @@ -9,6 +9,7 @@
- Added `--show-fullpath` flag to `ls`. ([#596](https://github.com/peak/s5cmd/issues/596))
- Added `pipe` command. ([#182](https://github.com/peak/s5cmd/issues/182))
- Added `--show-progress` flag to `cp` to show a progress bar. ([#51](https://github.com/peak/s5cmd/issues/51))
- Added `--include` flag to `cp`, `rm` and `sync` commands. ([#516](https://github.com/peak/s5cmd/issues/516))

#### Improvements
- Implemented concurrent multipart download support for `cat`. ([#245](https://github.com/peak/s5cmd/issues/245))
Expand Down
23 changes: 23 additions & 0 deletions README.md
Expand Up @@ -301,6 +301,29 @@ folder hierarchy.
⚠️ Copying objects (from S3 to S3) larger than 5GB is not supported yet. We have
an [open ticket](https://github.com/peak/s5cmd/issues/29) to track the issue.

#### Using Exclude and Include Filters
`s5cmd` supports the `--exclude` and `--include` flags, which can be used to specify patterns for objects to be excluded or included in commands.

- The `--exclude` flag specifies objects that should be excluded from the operation. Any object that matches the pattern will be skipped.
- The `--include` flag specifies objects that should be included in the operation. Only objects that match the pattern will be handled.
- If both flags are used, `--exclude` has precedence over `--include`. This means that if an object URL matches any of the `--exclude` patterns, the object will be skipped, even if it also matches one of the `--include` patterns.
- The order of the flags does not affect the results (unlike `aws-cli`).

The command below will delete only objects that end with `.log`.

s5cmd rm --include "*.log" 's3://bucket/logs/2020/*'

The command below will delete all objects except those that end with `.log` or `.txt`.

s5cmd rm --exclude "*.log" --exclude "*.txt" 's3://bucket/logs/2020/*'

If you wish, you can use multiple flags, like below. It will download objects that start with `request` and end with `.log`.

s5cmd cp --include "*.log" --include "request*" 's3://bucket/logs/2020/*' .

Using a combination of `--include` and `--exclude` also possible. The command below will only sync objects that end with `.log` and `.txt` but exclude those that start with `access_`. For example, `request.log`, and `license.txt` will be included, while `access_log.txt`, and `readme.md` are excluded.

s5cmd sync --include "*log" --exclude "access_*" --include "*txt" 's3://bucket/logs/*' .
#### Select JSON object content using SQL

`s5cmd` supports the `SelectObjectContent` S3 operation, and will run your
Expand Down
34 changes: 29 additions & 5 deletions command/cp.go
Expand Up @@ -9,6 +9,7 @@ import (
"net/http"
"os"
"path/filepath"
"regexp"
"strings"
"sync"

Expand Down Expand Up @@ -97,13 +98,16 @@ Examples:
19. Copy all files from S3 bucket to another S3 bucket but exclude the ones starts with log
> s5cmd {{.HelpName}} --exclude "log*" "s3://bucket/*" s3://destbucket
20. Download an S3 object from a requester pays bucket
20. Copy all files from S3 bucket to another S3 bucket but only the ones starts with log
> s5cmd {{.HelpName}} --include "log*" "s3://bucket/*" s3://destbucket
21. Download an S3 object from a requester pays bucket
> s5cmd --request-payer=requester {{.HelpName}} s3://bucket/prefix/object.gz .
21. Upload a file to S3 with a content-type and content-encoding header
22. Upload a file to S3 with a content-type and content-encoding header
> s5cmd --content-type "text/css" --content-encoding "br" myfile.css.br s3://bucket/
22. Download the specific version of a remote object to working directory
23. Download the specific version of a remote object to working directory
> s5cmd {{.HelpName}} --version-id VERSION_ID s3://bucket/prefix/object .
`

Expand Down Expand Up @@ -169,6 +173,10 @@ func NewSharedFlags() []cli.Flag {
Name: "exclude",
Usage: "exclude objects with given pattern",
},
&cli.StringSliceFlag{
Name: "include",
Usage: "include objects with given pattern",
},
&cli.BoolFlag{
Name: "raw",
Usage: "disable the wildcard operations, useful with filenames that contains glob characters",
Expand Down Expand Up @@ -282,6 +290,7 @@ type Copy struct {
forceGlacierTransfer bool
ignoreGlacierWarnings bool
exclude []string
include []string
cacheControl string
expires string
contentType string
Expand All @@ -290,6 +299,10 @@ type Copy struct {
showProgress bool
progressbar progressbar.ProgressBar

// patterns
excludePatterns []*regexp.Regexp
includePatterns []*regexp.Regexp

// region settings
srcRegion string
dstRegion string
Expand Down Expand Up @@ -346,6 +359,7 @@ func NewCopy(c *cli.Context, deleteSource bool) (*Copy, error) {
forceGlacierTransfer: c.Bool("force-glacier-transfer"),
ignoreGlacierWarnings: c.Bool("ignore-glacier-warnings"),
exclude: c.StringSlice("exclude"),
include: c.StringSlice("include"),
cacheControl: c.String("cache-control"),
expires: c.String("expires"),
contentType: c.String("content-type"),
Expand Down Expand Up @@ -422,7 +436,13 @@ func (c Copy) Run(ctx context.Context) error {
isBatch = obj != nil && obj.Type.IsDir()
}

excludePatterns, err := createExcludesFromWildcard(c.exclude)
c.excludePatterns, err = createRegexFromWildcard(c.exclude)
if err != nil {
printError(c.fullCommand, c.op, err)
return err
}

c.includePatterns, err = createRegexFromWildcard(c.include)
if err != nil {
printError(c.fullCommand, c.op, err)
return err
Expand Down Expand Up @@ -455,7 +475,11 @@ func (c Copy) Run(ctx context.Context) error {
continue
}

if isURLExcluded(excludePatterns, object.URL.Path, c.src.Prefix) {
isExcluded, err := isObjectExcluded(object, c.excludePatterns, c.includePatterns, c.src.Prefix)
if err != nil {
printError(c.fullCommand, c.op, err)
}
if isExcluded {
continue
}

Expand Down
4 changes: 2 additions & 2 deletions command/du.go
Expand Up @@ -144,7 +144,7 @@ func (sz Size) Run(ctx context.Context) error {

var merror error

excludePatterns, err := createExcludesFromWildcard(sz.exclude)
excludePatterns, err := createRegexFromWildcard(sz.exclude)
if err != nil {
printError(sz.fullCommand, sz.op, err)
return err
Expand All @@ -161,7 +161,7 @@ func (sz Size) Run(ctx context.Context) error {
continue
}

if isURLExcluded(excludePatterns, object.URL.Path, sz.src.Prefix) {
if isURLMatched(excludePatterns, object.URL.Path, sz.src.Prefix) {
continue
}

Expand Down
44 changes: 0 additions & 44 deletions command/exclude.go

This file was deleted.

4 changes: 2 additions & 2 deletions command/ls.go
Expand Up @@ -188,7 +188,7 @@ func (l List) Run(ctx context.Context) error {

var merror error

excludePatterns, err := createExcludesFromWildcard(l.exclude)
excludePatterns, err := createRegexFromWildcard(l.exclude)
if err != nil {
printError(l.fullCommand, l.op, err)
return err
Expand All @@ -205,7 +205,7 @@ func (l List) Run(ctx context.Context) error {
continue
}

if isURLExcluded(excludePatterns, object.URL.Path, l.src.Prefix) {
if isURLMatched(excludePatterns, object.URL.Path, l.src.Prefix) {
continue
}

Expand Down
50 changes: 39 additions & 11 deletions command/rm.go
Expand Up @@ -3,6 +3,7 @@ package command
import (
"context"
"fmt"
"regexp"

"github.com/hashicorp/go-multierror"
"github.com/urfave/cli/v2"
Expand Down Expand Up @@ -38,17 +39,20 @@ Examples:
5. Delete all matching objects but exclude the ones with .txt extension or starts with "main"
> s5cmd {{.HelpName}} --exclude "*.txt" --exclude "main*" "s3://bucketname/prefix/*"
6. Delete all matching objects but only the ones with .txt extension or starts with "main"
> s5cmd {{.HelpName}} --include "*.txt" --include "main*" "s3://bucketname/prefix/*"
6. Delete the specific version of a remote object's content to stdout
7. Delete the specific version of a remote object's content to stdout
> s5cmd {{.HelpName}} --version-id VERSION_ID s3://bucket/prefix/object
7. Delete all versions of an object in the bucket
8. Delete all versions of an object in the bucket
> s5cmd {{.HelpName}} --all-versions s3://bucket/object
8. Delete all versions of all objects that starts with a prefix in the bucket
9. Delete all versions of all objects that starts with a prefix in the bucket
> s5cmd {{.HelpName}} --all-versions "s3://bucket/prefix*"
9. Delete all versions of all objects in the bucket
10. Delete all versions of all objects in the bucket
> s5cmd {{.HelpName}} --all-versions "s3://bucket/*"
`

Expand All @@ -66,6 +70,10 @@ func NewDeleteCommand() *cli.Command {
Name: "exclude",
Usage: "exclude objects with given pattern",
},
&cli.StringSliceFlag{
Name: "include",
Usage: "include objects with given pattern",
},
&cli.BoolFlag{
Name: "all-versions",
Usage: "list all versions of object(s)",
Expand Down Expand Up @@ -94,13 +102,30 @@ func NewDeleteCommand() *cli.Command {
return err
}

excludePatterns, err := createRegexFromWildcard(c.StringSlice("exclude"))
if err != nil {
printError(fullCommand, c.Command.Name, err)
return err
}

includePatterns, err := createRegexFromWildcard(c.StringSlice("include"))
if err != nil {
printError(fullCommand, c.Command.Name, err)
return err
}

return Delete{
src: srcUrls,
op: c.Command.Name,
fullCommand: fullCommand,

// flags
exclude: c.StringSlice("exclude"),
include: c.StringSlice("include"),

// patterns
excludePatterns: excludePatterns,
includePatterns: includePatterns,

storageOpts: NewStorageOpts(c),
}.Run(c.Context)
Expand All @@ -119,6 +144,11 @@ type Delete struct {

// flag options
exclude []string
include []string

// patterns
excludePatterns []*regexp.Regexp
includePatterns []*regexp.Regexp

// storage options
storageOpts storage.Options
Expand All @@ -135,12 +165,6 @@ func (d Delete) Run(ctx context.Context) error {
return err
}

excludePatterns, err := createExcludesFromWildcard(d.exclude)
if err != nil {
printError(d.fullCommand, d.op, err)
return err
}

objch := expandSources(ctx, client, false, d.src...)

var (
Expand All @@ -164,7 +188,11 @@ func (d Delete) Run(ctx context.Context) error {
continue
}

if isURLExcluded(excludePatterns, object.URL.Path, srcurl.Prefix) {
isExcluded, err := isObjectExcluded(object, d.excludePatterns, d.includePatterns, srcurl.Prefix)
if err != nil {
printError(d.fullCommand, d.op, err)
}
if isExcluded {
continue
}

Expand Down
4 changes: 2 additions & 2 deletions command/select.go
Expand Up @@ -191,7 +191,7 @@ func (s Select) Run(ctx context.Context) error {
}
}()

excludePatterns, err := createExcludesFromWildcard(s.exclude)
excludePatterns, err := createRegexFromWildcard(s.exclude)
if err != nil {
printError(s.fullCommand, s.op, err)
return err
Expand All @@ -217,7 +217,7 @@ func (s Select) Run(ctx context.Context) error {
continue
}

if isURLExcluded(excludePatterns, object.URL.Path, s.src.Prefix) {
if isURLMatched(excludePatterns, object.URL.Path, s.src.Prefix) {
continue
}

Expand Down
3 changes: 3 additions & 0 deletions command/sync.go
Expand Up @@ -66,6 +66,9 @@ Examples:
10. Sync all files to S3 bucket but exclude the ones with txt and gz extension
> s5cmd {{.HelpName}} --exclude "*.txt" --exclude "*.gz" dir/ s3://bucket
11. Sync all files to S3 bucket but include the only ones with txt and gz extension
> s5cmd {{.HelpName}} --include "*.txt" --include "*.gz" dir/ s3://bucket
`

func NewSyncCommandFlags() []cli.Flag {
Expand Down

0 comments on commit 4c30eb3

Please sign in to comment.