/
populate_datastore.py
199 lines (163 loc) · 6.3 KB
/
populate_datastore.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
# Copyright 2014 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Script to populate datastore with system test data."""
from __future__ import print_function
import os
import string
import sys
import time
import uuid
import six
from google.cloud import datastore
ANCESTOR = ("Book", "GoT")
RICKARD = ANCESTOR + ("Character", "Rickard")
EDDARD = RICKARD + ("Character", "Eddard")
KEY_PATHS = (
RICKARD,
EDDARD,
ANCESTOR + ("Character", "Catelyn"),
EDDARD + ("Character", "Arya"),
EDDARD + ("Character", "Sansa"),
EDDARD + ("Character", "Robb"),
EDDARD + ("Character", "Bran"),
EDDARD + ("Character", "Jon Snow"),
)
CHARACTERS = (
{"name": u"Rickard", "family": u"Stark", "appearances": 0, "alive": False},
{"name": u"Eddard", "family": u"Stark", "appearances": 9, "alive": False},
{
"name": u"Catelyn",
"family": [u"Stark", u"Tully"],
"appearances": 26,
"alive": False,
},
{"name": u"Arya", "family": u"Stark", "appearances": 33, "alive": True},
{"name": u"Sansa", "family": u"Stark", "appearances": 31, "alive": True},
{"name": u"Robb", "family": u"Stark", "appearances": 22, "alive": False},
{"name": u"Bran", "family": u"Stark", "appearances": 25, "alive": True},
{"name": u"Jon Snow", "family": u"Stark", "appearances": 32, "alive": True},
)
def print_func(message):
if os.getenv("GOOGLE_CLOUD_NO_PRINT") != "true":
print(message)
def add_large_character_entities(client=None):
TOTAL_OBJECTS = 1500
NAMESPACE="LargeCharacterEntity"
KIND="LargeCharacter"
MAX_STRING = (string.ascii_lowercase * 58)[:1500]
client.namespace = NAMESPACE
# Query used for all tests
page_query = client.query(
kind=KIND,
namespace=NAMESPACE,
)
def put_objects(count):
breakpoint()
remaining = count
current=0
# Can only do 500 operations in a transaction with an overall
# size limit.
ENTITIES_TO_BATCH = 25
while current < count:
start = current
end = min(current + ENTITIES_TO_BATCH, count)
with client.transaction() as xact:
# The name/ID for the new entity
for i in range(start,end):
name = f'character{i:05d}'
# The Cloud Datastore key for the new entity
task_key = client.key(KIND, name)
# Prepares the new entity
task = datastore.Entity(key=task_key)
task['name'] = f"{i:05d}"
task['family'] = 'Stark'
task['alive'] = False
for i in string.ascii_lowercase:
task[f'space-{i}'] = MAX_STRING
# Saves the entity
xact.put(task)
current += ENTITIES_TO_BATCH
# Ensure we have 1500 entities for tests. If not, clean up type and add
# new entities equal to TOTAL_OBJECTS
all_entities = [e for e in page_query.fetch()]
if len(all_entities) != TOTAL_OBJECTS:
# Cleanup Collection if not an exact match
while all_entities:
entities = all_entities[:500]
all_entities = all_entities[500:]
client.delete_multi([e.key for e in entities])
# Put objects
put_objects(TOTAL_OBJECTS)
def add_characters(client=None):
if client is None:
# Get a client that uses the test dataset.
client = datastore.Client()
with client.transaction() as xact:
for key_path, character in six.moves.zip(KEY_PATHS, CHARACTERS):
if key_path[-1] != character["name"]:
raise ValueError(("Character and key don't agree", key_path, character))
entity = datastore.Entity(key=client.key(*key_path))
entity.update(character)
xact.put(entity)
print_func(
"Adding Character %s %s" % (character["name"], character["family"])
)
def add_uid_keys(client=None):
if client is None:
# Get a client that uses the test dataset.
client = datastore.Client()
num_batches = 2
batch_size = 500
for batch_num in range(num_batches):
with client.batch() as batch:
for seq_no in range(batch_size):
uid = str(uuid.uuid4())
key = client.key("uuid_key", uid)
entity = datastore.Entity(key=key)
entity["batch_num"] = batch_num
entity["seq_no"] = seq_no
batch.put(entity)
def add_timestamp_keys(client=None):
if client is None:
# Get a client that uses the test dataset.
client = datastore.Client()
num_batches = 2
batch_size = 500
timestamp_micros = set()
for batch_num in range(num_batches):
with client.batch() as batch:
for seq_no in range(batch_size):
print("time_time: batch: {}, sequence: {}".format(batch_num, seq_no))
now_micros = int(time.time() * 1e6)
while now_micros in timestamp_micros:
now_micros = int(time.time() * 1e6)
timestamp_micros.add(now_micros)
key = client.key("timestamp_key", now_micros)
entity = datastore.Entity(key=key)
entity["batch_num"] = batch_num
entity["seq_no"] = seq_no
batch.put(entity)
def main():
client = datastore.Client()
flags = sys.argv[1:]
if len(flags) == 0:
flags = ["--characters", "--uuid", "--timestamps"]
if "--characters" in flags:
add_characters(client)
if "--uuid" in flags:
add_uid_keys(client)
if "--timestamps" in flags:
add_timestamp_keys(client)
if __name__ == "__main__":
main()