-
Notifications
You must be signed in to change notification settings - Fork 10
/
mps-load-avro-bq.sh
executable file
·44 lines (34 loc) · 1.13 KB
/
mps-load-avro-bq.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
#!/bin/bash
cd "$(dirname "$0")/.." || exit
project_id=$(gcloud config get-value project)
dataset_id="test_avro"
gsutil -m cp avro-data/* gs://${project_id}/data
bq rm -rf $dataset_id
bq mk $dataset_id
total=0
error=0
skip=0
trap "exit" INT
for document in $(ls avro-data | sed 's/.avro//'); do
# downcase hyphens to underscores before generating names
bq_document=$(echo $document | sed 's/-/_/g')
namespace=$(echo $bq_document | cut -d. -f1)
doctype=$(echo $bq_document | cut -d. -f2)
docver=$(echo $bq_document | cut -d. -f3)
table_exists=$(bq ls ${dataset_id} | grep ${namespace}__${doctype}_v${docver})
if [[ ! -z ${SKIP_EXISTING+x} ]] && [[ ! -z ${table_exists} ]]; then
echo "skipping bq load for ${document}"
((skip++))
continue
fi
echo "running bq load for ${document}"
bq load --source_format=AVRO \
--replace \
${dataset_id}.${namespace}__${doctype}_v${docver} \
gs://${project_id}/data/${document}.avro
if [[ $? -ne 0 ]]; then
((error++))
fi
((total++))
done
echo "$((total-error))/$total loaded successfully, $skip skipped"