-
Notifications
You must be signed in to change notification settings - Fork 62
/
dataset.yaml
84 lines (74 loc) · 3.06 KB
/
dataset.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
dataset:
# The `dataset` block includes properties for your dataset that will be shown
# to users of your data on the Google Cloud website.
# Must be exactly the same name as the folder name your dataset.yaml is in.
name: DATASET_FOLDER_NAME
# A friendly, human-readable name of the dataset
friendly_name: ~
# A short, descriptive summary of the dataset.
description: ~
# A list of sources the dataset is derived from, using the YAML list syntax.
dataset_sources: ~
# A list of terms and conditions that users of the dataset should agree on,
# using the YAML list syntax.
terms_of_use: ~
resources:
# A list of Google Cloud resources needed by your dataset. In principle, all
# pipelines under a dataset should be able to share these resources.
#
# The currently supported resources are shown below. Use only the resources
# you need, and delete the rest as needed by your pipeline.
#
# We will keep adding to the list below to support more Google Cloud resources
# over time. If a resource you need isn't supported, please file an issue on
# the repository.
- type: bigquery_dataset
# Google BigQuery dataset to namespace all tables managed by this folder
#
# Required Properties:
# dataset_id
#
# Optional Properties:
# friendly_name (A user-friendly name of the dataset)
# description (A user-friendly description of the dataset)
# location (The geographic location where the dataset should reside)
dataset_id: DATASET_FOLDER_NAME
description: BIGQUERY_DATASET_DESCRIPTION
- type: storage_bucket
# Google Cloud Storage Bucket that your pipelines need. Say, you need an
# intermediate bucket to store data in-flight. Or you need a bucket to
# archive or backup data generated by the pipelines.
#
# Because Cloud Storage bucket names must be globally unique, the `name`
# specified below will be mapped to the unique bucket name:
#
# `{bucket_name_prefix}-{name}`
#
# Where the bucket name prefix is supplied using the `--bucket-name-prefix`
# parameter when running `scripts/generate_terraform.py`.
#
# Use hyphenated syntax, e.g. `some-prefix-123`, for the names. Note that
# bucket names must not contain "google" or close misspellings, such as
# "g00gle".
#
# Required Properties:
# name
# location
#
# Optional Properties:
# uniform_bucket_level_access (we suggest False for fine-grained access)
name: YOUR-BUCKET-NAME
location: US