samples/dataset.yaml

# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

dataset:
  # The `dataset` block includes properties for your dataset that will be shown
  # to users of your data on the Google Cloud website.

  # Must be exactly the same name as the folder name your dataset.yaml is in.
  name: DATASET_FOLDER_NAME

  # A friendly, human-readable name of the dataset
  friendly_name: ~

  # A short, descriptive summary of the dataset.
  description: ~

  # A list of sources the dataset is derived from, using the YAML list syntax.
  dataset_sources: ~

  # A list of terms and conditions that users of the dataset should agree on,
  # using the YAML list syntax.
  terms_of_use: ~


resources:
  # A list of Google Cloud resources needed by your dataset. In principle, all
  # pipelines under a dataset should be able to share these resources.
  #
  # The currently supported resources are shown below. Use only the resources
  # you need, and delete the rest as needed by your pipeline.
  #
  # We will keep adding to the list below to support more Google Cloud resources
  # over time. If a resource you need isn't supported, please file an issue on
  # the repository.

  - type: bigquery_dataset
    # Google BigQuery dataset to namespace all tables managed by this folder
    #
    # Required Properties:
    #   dataset_id
    #
    # Optional Properties:
    #   friendly_name (A user-friendly name of the dataset)
    #   description   (A user-friendly description of the dataset)
    #   location      (The geographic location where the dataset should reside)
    dataset_id: DATASET_FOLDER_NAME
    description: BIGQUERY_DATASET_DESCRIPTION

  - type: storage_bucket
    # Google Cloud Storage Bucket that your pipelines need. Say, you need an
    # intermediate bucket to store data in-flight. Or you need a bucket to
    # archive or backup data generated by the pipelines.
    #
    # Because Cloud Storage bucket names must be globally unique, the `name`
    # specified below will be mapped to the unique bucket name:
    #
    #   `{bucket_name_prefix}-{name}`
    #
    # Where the bucket name prefix is supplied using the `--bucket-name-prefix`
    # parameter when running `scripts/generate_terraform.py`.
    #
    # Use hyphenated syntax, e.g. `some-prefix-123`, for the names. Note that
    # bucket names must not contain "google" or close misspellings, such as
    # "g00gle".
    #
    # Required Properties:
    #   name
    #   location
    #
    # Optional Properties:
    #   uniform_bucket_level_access (we suggest False for fine-grained access)
    name: YOUR-BUCKET-NAME
    location: US