This repository has been archived by the owner on May 26, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
dataset.ex
288 lines (252 loc) · 7.61 KB
/
dataset.ex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
defmodule SmartCity.Dataset do
@moduledoc """
Struct defining a dataset definition and functions for reading and writing dataset definitions to Redis.
```javascript
const Dataset = {
"id": "", // UUID
"business": { // Project Open Data Metadata Schema v1.1
"dataTitle": "", // user friendly (dataTitle)
"description": "",
"keywords": [""],
"modifiedDate": "",
"orgTitle": "", // user friendly (orgTitle)
"contactName": "",
"contactEmail": "",
"license": "",
"rights": "",
"homepage": "",
"spatial": "",
"temporal": "",
"publishFrequency": "",
"conformsToUri": "",
"describedByUrl": "",
"describedByMimeType": "",
"parentDataset": "",
"issuedDate": "",
"language": "",
"referenceUrls": [""],
"categories": [""]
},
"technical": {
"dataName": "", // ~r/[a-zA-Z_]+$/
"orgId": "",
"orgName": "", // ~r/[a-zA-Z_]+$/
"systemName": "", // ${orgName}__${dataName},
"schema": [
{
"name": "",
"type": "",
"description": ""
}
],
"sourceUrl": "",
"protocol": "", // List of protocols to use. Defaults to nil. Can be [http1, http2]
"authUrl": "",
"sourceFormat": "",
"sourceType": "", // remote|stream|batch
"cadence": "",
"sourceQueryParams": {
"key1": "",
"key2": ""
},
"transformations": [], // ?
"validations": [], // ?
"sourceHeaders": {
"header1": "",
"header2": ""
}
"authHeaders": {
"header1": "",
"header2": ""
}
},
"_metadata": {
"intendedUse": [],
"expectedBenefit": []
}
}
```
"""
alias SmartCity.Dataset.Business
alias SmartCity.Helpers
alias SmartCity.Dataset.Technical
alias SmartCity.Dataset.Metadata
alias SmartCity.Registry.Subscriber
@typep id :: term()
@type t :: %SmartCity.Dataset{
version: String.t(),
id: String.t(),
business: SmartCity.Dataset.Business.t(),
technical: SmartCity.Dataset.Technical.t(),
_metadata: SmartCity.Dataset.Metadata.t()
}
@derive Jason.Encoder
defstruct version: "0.2", id: nil, business: nil, technical: nil, _metadata: nil
@conn SmartCity.Registry.Application.db_connection()
defmodule NotFound do
defexception [:message]
end
@doc """
Returns a new `SmartCity.Dataset` struct. `SmartCity.Dataset.Business`,
`SmartCity.Dataset.Technical`, and `SmartCity.Dataset.Metadata` structs will be created along the way.
## Parameters
- msg : map defining values of the struct to be created.
Can be initialized by
- map with string keys
- map with atom keys
- JSON
"""
@spec new(String.t() | map()) :: {:ok, SmartCity.Dataset.t()} | {:error, term()}
def new(msg) when is_binary(msg) do
with {:ok, decoded} <- Jason.decode(msg, keys: :atoms) do
new(decoded)
end
end
def new(%{"id" => _} = msg) do
msg
|> Helpers.to_atom_keys()
|> new()
end
def new(%{id: id, business: biz, technical: tech, _metadata: meta}) do
struct =
struct(%__MODULE__{}, %{
id: id,
business: Business.new(biz),
technical: Technical.new(tech),
_metadata: Metadata.new(meta)
})
{:ok, struct}
rescue
e -> {:error, e}
end
def new(%{id: id, business: biz, technical: tech}) do
new(%{id: id, business: biz, technical: tech, _metadata: %{}})
end
def new(msg) do
{:error, "Invalid registry message: #{inspect(msg)}"}
end
@doc """
Writes the dataset to history and sets the dataset as the latest definition for the given `id` field of the passed in dataset in Redis.
Registry subscribers will be notified and have their `handle_dataset/1` callback triggered.
Returns an {:ok, id} tuple() where id is the dataset id.
## Parameters
- dataset: SmartCity.Dataset struct to be written.
"""
@spec write(SmartCity.Dataset.t()) :: {:ok, id()}
def write(%__MODULE__{id: id} = dataset) do
add_to_history(dataset)
Redix.command!(@conn, ["SET", latest_key(id), Jason.encode!(dataset)])
Subscriber.send_dataset_update(id)
ok(id)
end
@doc """
Returns `{:ok, dataset}` with the dataset for the given id, or an error with the reason.
"""
@spec get(id()) :: {:ok, SmartCity.Dataset.t()} | {:error, term()}
def get(id) do
with {:ok, json} <- get_latest(id),
{:ok, dataset} <- new(json) do
{:ok, dataset}
end
end
defp get_latest(id) do
case Redix.command(@conn, ["GET", latest_key(id)]) do
{:ok, nil} -> {:error, %NotFound{message: "no dataset with given id found -- ID: #{id}"}}
result -> result
end
end
@doc """
Returns the dataset with the given id or raises an error.
"""
@spec get!(id()) :: SmartCity.Dataset.t() | no_return()
def get!(id) do
handle_ok_error(fn -> get(id) end)
end
@doc """
Returns `{:ok, dataset_versions}` with a history of all versions of the given dataset.
"""
@spec get_history(id()) :: {:ok, [SmartCity.Dataset.t()]} | {:error, term()}
def get_history(id) do
with {:ok, list} <- Redix.command(@conn, ["LRANGE", history_key(id), "0", "-1"]) do
list
|> Enum.map(&Jason.decode!(&1, keys: :atoms))
|> Enum.map(fn value -> %{value | dataset: to_dataset(value.dataset)} end)
|> ok()
end
end
@doc """
See `get_history/1`. Raises on errors.
"""
@spec get_history!(id()) :: [SmartCity.Dataset.t()] | no_return()
def get_history!(id) do
handle_ok_error(fn -> get_history(id) end)
end
@doc """
Returns `{:ok, datasets}` with all dataset definitions in the system.
"""
@spec get_all() :: {:ok, [SmartCity.Dataset.t()]} | {:error, term()}
def get_all() do
case keys_mget(latest_key("*")) do
{:ok, list} -> {:ok, Enum.map(list, &to_dataset(&1))}
error -> error
end
end
@doc """
See `get_all/0`. Raises on errors.
"""
@spec get_all!() :: [SmartCity.Dataset.t()] | no_return()
def get_all!() do
handle_ok_error(fn -> get_all() end)
end
@doc """
Returns true if `SmartCity.Dataset.Technical sourceType field is stream`
"""
def is_stream?(%__MODULE__{technical: %{sourceType: sourceType}}) do
"stream" == sourceType
end
@doc """
Returns true if `SmartCity.Dataset.Technical sourceType field is remote`
"""
def is_remote?(%__MODULE__{technical: %{sourceType: sourceType}}) do
"remote" == sourceType
end
@doc """
Returns true if `SmartCity.Dataset.Technical sourceType field is batch`
"""
def is_batch?(%__MODULE__{technical: %{sourceType: sourceType}}) do
"batch" == sourceType
end
defp add_to_history(%__MODULE__{id: id} = dataset) do
body = %{creation_ts: DateTime.utc_now() |> DateTime.to_iso8601(), dataset: dataset}
Redix.command!(@conn, ["RPUSH", history_key(id), Jason.encode!(body)])
end
defp latest_key(id) do
"smart_city:dataset:latest:#{id}"
end
defp history_key(id) do
"smart_city:dataset:history:#{id}"
end
defp keys_mget(key) do
case Redix.command(@conn, ["KEYS", key]) do
{:ok, []} -> {:ok, []}
{:ok, keys} -> Redix.command(@conn, ["MGET" | keys])
result -> result
end
end
defp handle_ok_error(function) when is_function(function) do
case function.() do
{:ok, value} -> value
{:error, reason} -> raise reason
end
end
defp to_dataset(%{} = map) do
{:ok, dataset} = new(map)
dataset
end
defp to_dataset(json) do
json
|> Jason.decode!()
|> to_dataset()
end
defp ok(value), do: {:ok, value}
end