Skip to content

Commit

Permalink
Add options to convert a complete root data dir, dataset dir or a spe…
Browse files Browse the repository at this point in the history
…cific dataset version

PiperOrigin-RevId: 612807114
  • Loading branch information
tomvdw authored and The TensorFlow Datasets Authors committed Mar 5, 2024
1 parent 1c914f3 commit 47f7a2e
Show file tree
Hide file tree
Showing 3 changed files with 382 additions and 92 deletions.
25 changes: 23 additions & 2 deletions tensorflow_datasets/scripts/cli/convert_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,14 +34,32 @@

def add_parser_arguments(parser: argparse.ArgumentParser) -> None:
"""Add arguments for `convert_format` subparser."""
parser.add_argument(
'--root_data_dir',
type=str,
help=(
'Root data dir that contains all datasets. All datasets and all their'
' configs and versions that are in this folder will be converted.'
),
required=False,
)
parser.add_argument(
'--dataset_dir',
type=str,
help=(
'Path where the dataset to be converted is located. Converts all'
' configs and versions in this folder.'
),
required=False,
)
parser.add_argument(
'--dataset_version_dir',
type=str,
help=(
'Path where the dataset to be converted is located. Should include'
' config and version.'
),
required=True,
required=False,
)
parser.add_argument(
'--out_file_format',
Expand Down Expand Up @@ -80,9 +98,12 @@ def register_subparser(parsers: argparse._SubParsersAction) -> None:
add_parser_arguments(parser)
parser.set_defaults(
subparser_fn=lambda args: convert_format_utils.convert_dataset(
dataset_dir=args.dataset_dir,
out_dir=args.out_dir,
out_file_format=args.out_file_format,
dataset_dir=args.dataset_dir or None,
root_data_dir=args.root_data_dir or None,
dataset_version_dir=args.dataset_version_dir or None,
overwrite=args.overwrite,
use_beam=args.use_beam,
)
)

0 comments on commit 47f7a2e

Please sign in to comment.