/
base.py
executable file
·818 lines (691 loc) · 30.6 KB
/
base.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""Base classes for the Stoner package."""
__all__ = ["_evaluatable", "regexpDict", "string_to_type", "typeHintedDict", "metadataObject"]
from collections import OrderedDict
from collections.abc import MutableMapping, Mapping
import re
import copy
import datetime
from typing import (
Union,
Optional,
Any,
Dict,
Mapping as MappingType,
Tuple,
List,
Set,
Callable,
Sequence,
Iterable as IterableType,
Generator,
)
from dateutil import parser
import numpy as np
from numpy import NaN
import asteval
try:
import pandas as pd
except ImportError:
pd = None
from ..compat import string_types, int_types, _pattern_type
from ..tools import isiterable, isComparable
from .Typing import String_Types, RegExp, Filename
_asteval_interp = None
def _parse_date(string: str) -> datetime.datetime:
"""Run the dateutil parser with a UK sensible date order."""
parserinfo = parser.parserinfo(dayfirst=True)
return parser.parse(string, parserinfo)
def literal_eval(string: str) -> Any:
"""Use the asteval module to interpret arbitrary strings slightly safely.
Args:
string (str):
String epxression to be evaluated.
Returns:
(object):
Evaluation result.
On the first call this will create a new asteval.Interpreter() instance and
preload some key modules into the symbol table.
"""
global _asteval_interp # pylint: disable=W0603
if _asteval_interp is None:
_asteval_interp = asteval.Interpreter(
usersyms={"np": np, "re": re, "NaN": NaN, "nan": NaN, "None": None, "datetime": datetime}
)
try:
return _asteval_interp(string, show_errors=False)
except (SyntaxError, ValueError, NameError, IndexError, TypeError) as err:
raise ValueError(f"Cannot interpret {string} as valid Python") from err
def string_to_type(value: String_Types) -> Any:
"""Given a string value try to work out if there is a better python type dor the value.
First of all the first character is checked to see if it is a [ or { which would
suggest this is a list of dictionary. If the value looks like a common boolean
value (i.e. Yes, No, True, Fale, On, Off) then it is assumed to be a boolean value.
Finally it interpretation as an int, float or string is tried.
Args:
value (string):
string representation of he value
Returns:
A python object of the natural type for value
"""
ret = None
if not isinstance(value, string_types):
raise TypeError(f"Value must be a string not a {type(value)}")
value = value.strip()
if value != "None":
tests = ["list(" + value + ")", "dict(" + value + ")"]
try:
i = "[{".index(value[0])
ret = literal_eval(tests[i]) # pylint: disable=eval-used
except (SyntaxError, ValueError):
if value.lower() in ["true", "yes", "on", "false", "no", "off"]:
ret = value.lower() in ["true", "yes", "on"] # Boolean
else:
for trial in [int, float, _parse_date, str]:
try:
ret = trial(value)
break
except (ValueError, OverflowError, TypeError):
continue
else:
ret = None
except IndexError: # raised when 0-length struing is used
ret = value
return ret
class _evaluatable:
"""Placeholder to indicate that special action needed to convert a string representation to valid Python type."""
class regexpDict(OrderedDict):
"""An ordered dictionary that permits looks up by regular expression."""
allowed_keys: Tuple = (object,)
def __lookup__(
self, name: Union[str, RegExp], multiple: bool = False, exact: bool = False
) -> Union[Any, List[Any]]:
"""Lookup name and find a matching key or raise KeyError.
Parameters:
name (str, _pattern_type):
The name to be searched for
Keyword Arguments:
multiple (bool):
Return a single entry ()default, False) or multiple entries
exact(bool):
Do not do a regular expression search, match the exact string only.
Returns:
Canonical key matching the specified name.
Raises:
KeyError: if no key matches name.
"""
ret = None
try: # name directly as key
super().__getitem__(name)
ret = name
except (KeyError, TypeError) as err: # Fall back to regular expression lookup
if not exact and not isinstance(name, string_types + int_types):
name = repr(name)
if exact:
raise KeyError(f"{name} not a key and exact match requested.") from err
nm = name
if isinstance(name, string_types):
try:
nm = re.compile(name)
except re.error:
pass
elif isinstance(name, int_types): # We can do this because we're a dict!
try:
ret = sorted(self.keys())[name]
except IndexError as err:
raise KeyError(f"{name} is not a match to any key.") from err
else:
nm = name
if isinstance(nm, _pattern_type):
ret = [n for n in self.keys() if isinstance(n, string_types) and nm.match(n)]
if not ret:
ret = [n for n in self.keys() if isinstance(n, string_types) and nm.search(n)]
if ret is None or isiterable(ret) and not ret:
raise KeyError(f"{name} is not a match to any key.")
if multiple: # sort out returning multiple entries or not
if not isinstance(ret, list):
ret = [ret]
else:
if isinstance(ret, list):
ret = ret[0]
return ret
def __getitem__(self, name: Any) -> Any:
"""Add a lookup via regular expression when retrieving items."""
return super().__getitem__(self.__lookup__(name))
def __setitem__(self, name: Any, value: Any) -> None:
"""Overwrite any matching key, or if not found adds a new key."""
try:
key = self.__lookup__(name, exact=True)
except KeyError as err:
if not isinstance(name, self.allowed_keys):
raise KeyError(f"{name} is not a match to any key.") from err
key = name
super().__setitem__(key, value)
def __delitem__(self, name: Any) -> None:
"""Delete keys that match by regular expression as well as exact matches."""
super().__delitem__(self.__lookup__(name))
def __contains__(self, name: Any) -> bool:
"""Return True if name either is an exact key or matches when interpreted as a regular expression."""
try:
name = self.__lookup__(name)
return True
except (KeyError, TypeError):
return False
def __eq__(self, other: Any) -> bool:
"""Define equals operation in terms of xor operation."""
if not isinstance(other, Mapping):
return NotImplemented
return len(self ^ other) == 0 and len(other ^ self) == 0
def __sub__(self, other: MappingType) -> "regexpDict":
"""Give the difference between two arrays."""
if not isinstance(other, Mapping):
return NotImplemented
mk = set(self.keys())
ok = set(other.keys())
ret = type(self)({k: self[k] for k in (mk - ok)})
return ret
def __xor__(self, other: MappingType) -> Union["regexpDict", Set[Any]]:
"""Give the difference between two arrays."""
if not isinstance(other, Mapping):
return NotImplemented
mk = set(self.keys())
ok = set(other.keys())
if mk != ok: # Keys differ
return mk ^ ok
# Do values differ?
ret = type(self)()
for (mk, mv), (ok, ov) in zip(sorted(self.items()), sorted(other.items())):
if np.any(mv != ov) and isComparable(mv, ov):
ret[mk] = (mv, ov)
return ret
def __or__(self, other):
"""Implement Python 3.9 style or operator to do a merge."""
ret = self.copy()
ret.update(other)
return ret
def __ior__(self, other):
"""Implement Python 3.9 style inplace or operator to do an update."""
self.update(other)
return self
def has_key(self, name: Any) -> bool:
"""Key is definitely in dictionary as literal."""
return super().__contains__(name)
class typeHintedDict(regexpDict):
"""Extends a :py:class:`regedpDict` to include type hints of what each key contains.
The CM Physics Group at Leeds makes use of a standard file format that closely matches
the :py:class:`DataFile` data structure. However, it is convenient for this file format
to be ASCII text for ease of use with other programs. In order to represent metadata which
can have arbitrary types, the LabVIEW code that generates the data file from our measurements
adds a type hint string. The Stoner Python code can then make use of this type hinting to
choose the correct representation for the metadata. The type hinting information is retained
so that files output from Python will retain type hints to permit them to be loaded into
strongly typed languages (sch as LabVIEW).
Attributes:
_typehints (dict):
The backing store for the type hint information
__regexGetType (re):
Used to extract the type hint from a string
__regexSignedInt (re):
matches type hint strings for signed integers
__regexUnsignedInt (re):
matches the type hint string for unsigned integers
__regexFloat (re):
matches the type hint strings for floats
__regexBoolean (re):
matches the type hint string for a boolean
__regexStrng (re):
matches the type hint string for a string variable
__regexEvaluatable (re):
matches the type hint string for a compoind data type
__types (dict):
mapping of type hinted types to actual Python types
__tests (dict):
mapping of the regex patterns to actual python types
"""
allowed_keys: Tuple = string_types
# Force metadata keys to be strings
__regexGetType: RegExp = re.compile(r"([^\{]*)\{([^\}]*)\}")
# Match the contents of the inner most{}
__regexSignedInt: RegExp = re.compile(r"^I\d+")
# Matches all signed integers
__regexUnsignedInt: RegExp = re.compile(r"^U / d+")
# Match unsigned integers
__regexFloat: RegExp = re.compile(r"^(Extended|Double|Single)\sFloat")
# Match floating point types
__regexBoolean: RegExp = re.compile(r"^Boolean")
__regexString = re.compile(r"^(String|Path|Enum)")
__regexTimestamp: RegExp = re.compile(r"Timestamp")
__regexEvaluatable: RegExp = re.compile(r"^(Cluster||\d+D Array|List)")
__types: Dict[str, type] = dict( # pylint: disable=used-before-assignment
[ # Key order does matter here!
("Boolean", bool),
("I32", int),
("Double Float", float),
("Cluster", dict),
("AnonCluster", tuple),
("Array", np.ndarray),
("List", list),
("Timestamp", datetime.datetime),
("String", str),
]
)
# This is the inverse of the __tests below - this gives
# the string type for standard Python classes
__tests: List[Tuple] = [
(__regexSignedInt, int),
(__regexUnsignedInt, int),
(__regexFloat, float),
(__regexBoolean, bool),
(__regexTimestamp, datetime.datetime),
(__regexString, str),
(__regexEvaluatable, _evaluatable()),
]
# This is used to work out the correct python class for
# some string types
def __init__(self, *args: Any, **kargs: Any) -> None:
"""Construct the typeHintedDict.
Args:
*args, **kargs:
Pass any parameters through to the {} constructor.
Calls the {} constructor, then runs through the keys of the
created dictionary and either uses the string type embedded in
the keyname to generate the type hint (and remove the
embedded string type from the keyname) or determines the likely
type hint from the value of the dict element.
"""
self._typehints = OrderedDict()
super().__init__(*args, **kargs)
for key in list(self.keys()): # Check through all the keys and see if they contain
# type hints. If they do, move them to the
# _typehint dict
value = super().__getitem__(key)
super().__delitem__(key)
# __Setitem__ has the logic to handle embedded type hints correctly
self[key] = value
@property
def types(self) -> Dict:
"""Return the dictionary of value types."""
return self._typehints
def findtype(self, value: Any) -> str:
"""Determine the correct string type to return for common python classes.
Args:
value (any):
The data value to determine the type hint for.
Returns:
A type hint string
Note:
Understands booleans, strings, integers, floats and np
arrays(as arrays), and dictionaries (as clusters).
"""
typ = "Invalid Type"
if value is None:
return "Void"
for t, val in self.__types.items():
if isinstance(value, val):
if t in ["Cluster", "AnonCluster"]:
elements = []
if isinstance(value, dict):
for k in value:
elements.append(self.findtype(value[k]))
else:
for v in value:
elements.append(self.findtype(v))
tt = ","
tt = tt.join(elements)
typ = "Cluster (" + tt + ")"
elif t == "Array":
z = np.zeros(1, dtype=value.dtype)
typ = f"{value.ndim}D Array ({self.findtype(z[0])})"
else:
typ = t
break
return typ
def __mungevalue(self, typ: str, value: Any) -> Any:
"""Based on a string type t, return value cast to an appropriate python class.
Args:
typ (string):
is a string representing the type
value (any):
is the data value to be munged into the correct class
Returns:
Returns the munged data value
Detail:
The class has a series of precompiled regular
expressions that will match type strings, a list of these has been
constructed with instances of the matching Python classes. These
are tested in turn and if the type string matches the constructor of
the associated python class is called with value as its argument.
"""
ret = None
if typ == "Invalid Type": # Short circuit here
return repr(value)
for regexp, valuetype in self.__tests:
matched = regexp.search(typ)
if matched is not None:
if isinstance(valuetype, _evaluatable):
try:
# we've got a string already don't need repr
if isinstance(value, string_types):
ret = literal_eval(value)
else:
ret = literal_eval(repr(value)) # pylint: disable=eval-used
except ValueError: # Oops just keep string format
ret = str(value)
except SyntaxError:
ret = ""
break
if issubclass(valuetype, datetime.datetime):
if isinstance(ret, datetime.datetime):
break # Alreadu a datetime object
try:
ret = parser.parse(value)
except ValueError:
try:
ret = literal_eval(value)
except ValueError:
ret = str(value)
break
if issubclass(valuetype, str) and value == "None":
ret = None
break
ret = valuetype(value)
break
else:
ret = str(value)
try:
ret = _parse_date(ret)
except (ValueError, OverflowError):
pass
return ret
def _get_name_(self, name: Union[str, RegExp]) -> Tuple[str, Optional[str]]:
"""Check a string name for an embedded type hint and strips it out.
Args:
name(string):
String containing the name with possible type hint embedeed
Returns:
(name,typehint) (tuple):
A tuple containing just the name of the mateadata and (if found
the type hint string),
"""
search = str(name)
m = self.__regexGetType.search(search)
if m is not None:
return m.group(1), m.group(2)
if not isinstance(name, string_types + int_types):
return search, None
return name, None
def __getitem__(self, name: Union[str, RegExp]) -> Any:
"""Check whether its been given a typehint in the item name and deals with it appropriately.
Args:
name (string):
metadata key to retrieve
Returns:
metadata value
"""
key = name
(name, typehint) = self._get_name_(name)
name = self.__lookup__(name, True)
value = [super(typeHintedDict, self).__getitem__(nm) for nm in name] # pylint: disable=super-with-arguments
if typehint is not None:
value = [self.__mungevalue(typehint, v) for v in value]
if len(value) == 0: # pylint: disable=len-as-condition
raise KeyError(f"{key} is not a valid key even when interpreted as a sregular expression!")
if len(value) == 1:
return value[0]
return dict(zip(name, value))
def __setitem__(self, name: Union[str, RegExp], value: Any) -> None:
"""Set an item in the dict, checking the key for an embedded type hint or inspecting the value as necessary.
Arguments:
name (string):
The metadata keyname
value (any):
The value to store in the metadata string
Note:
If you provide an embedded type string it is your responsibility
to make sure that it correctly describes the actual data
typehintDict does not verify that your data and type string are
compatible.
"""
name, typehint = self._get_name_(name)
if typehint is not None:
self._typehints[name] = typehint
if value is None: # Empty data so reset to string and set empty #RCT changed the test here
super().__setitem__(name, "")
self._typehints[name] = "String"
else:
super().__setitem__(name, self.__mungevalue(typehint, value))
else:
if isinstance(value, string_types):
value = string_to_type(value)
self._typehints[name] = self.findtype(value)
super().__setitem__(name, value)
def __delitem__(self, name: Union[str, RegExp]) -> None:
"""Delete the specified key.
Args:
name (string): The keyname to be deleted
"""
name = self._get_name_(name)[0]
name = self.__lookup__(name)
del self._typehints[name]
super().__delitem__(name)
def __repr__(self) -> str:
"""Create a text representation of the dictionary with type data."""
ret = [f"{repr(key)}:{self.type(key)}:{repr(self[key])}" for key in sorted(self)]
return "\n".join(ret)
def copy(self) -> "typeHintedDict":
"""Provide a copy method that is aware of the type hinting strings.
This produces a flat dictionary with the type hint embedded in the key name.
Returns:
A copy of the current typeHintedDict
"""
cls = type(self)
ret = cls()
for k, val in self.items():
t = self._typehints[k]
ret._typehints[k] = t
super(typeHintedDict, ret).__setitem__(k, copy.copy(val))
return ret
def filter(self, name: Union[str, RegExp, Callable]) -> None:
"""Filter the dictionary keys by name.
Reduce the metadata dictionary leaving only keys satisfied by name.
Keyword Arguments:
name(str or callable):
either a str to match or a callable function that takes metadata key-value
as an argument and returns True or False
"""
rem = []
for k in self.keys():
if isinstance(name, string_types):
if name not in k:
rem.append(k)
elif hasattr(name, "__call__"):
if not name(k):
rem.append(k)
else:
raise ValueError("name must be a string or a function")
for k in rem:
del self[k]
def type(self, key: Union[str, RegExp, Sequence[Union[str, RegExp]]]) -> Union[str, List[str]]:
"""Return the typehint for the given k(s).
This simply looks up the type hinting dictionary for each key it is given.
Args:
key (string or sequence of strings):
Either a single string key or a iterable type containing keys
Returns:
The string type hint (or a list of string type hints)
"""
if isinstance(key, string_types):
return self._typehints[key]
try:
return [self._typehints[x] for x in key]
except TypeError:
return self._typehints[key]
def export(self, key: Union[str, RegExp]) -> str:
"""Export a single metadata value to a string representation with type hint.
In the ASCII based file format, the type hinted metadata is represented
in the first column of a tab delimited text file as a series of lines
with format keyname{typhint}=string_value.
Args:
key (string):
The metadata key to export
Returns:
A string of the format : key{type hint} = value
"""
if isinstance(self[key], string_types): # avoid string within string problems and backslash overdrive
ret = f"{key}{{{self.type(key)}}}={self[key]}"
else:
ret = f"{key}{{{self.type(key)}}}={repr(self[key])}"
return ret
def export_all(self) -> List[str]:
"""Return all the entries in the typeHintedDict as a list of exported lines.
Returns:
(list of str): A list of exported strings
Notes:
The keys are returned in sorted order as a result of the underlying OrderedDict meothd.
"""
return [self.export(x) for x in self]
def import_all(self, lines: List[str]) -> None:
"""Read multiple lines of strings and tries to import keys from them.
Args:
lines(list of str):
The lines of metadata values to import.
"""
for line in lines:
self.import_key(line)
def import_key(self, line: str) -> None:
"""Import a single key from a string like key{type hint} = value.
This is the inverse of the :py:meth:`typeHintedDict.export` method.
Args:
line(str):
he string line to be interpreted as a key-value pair.
"""
parts = line.split("=")
k = parts[0]
v = "=".join(parts[1:]) # rejoin any = in the value string
self[k] = v
class metadataObject(MutableMapping):
"""Represent some sort of object that has metadata stored in a :py:class:`Stoner.Core.typeHintedDict` object.
Attributes:
metadata (typeHintedDict):
Dictionary of key-value metadata pairs. The dictionary tries to retain information about the type of data
so as to aid import and export from CM group LabVIEW code.
"""
def __new__(cls, *args):
"""Pre initialisation routines."""
self = super().__new__(cls)
self._public_attrs_real = {}
self._metadata = typeHintedDict()
return self
def __init__(self, *args: Any, **kargs: Any) -> None: # pylint: disable=unused-argument
"""Initialise the current metadata attribute."""
metadata = kargs.pop("metadata", {})
self._metadata = getattr(self, "_metadata", typeHintedDict())
self.metadata.update(metadata)
super().__init__()
@property
def _public_attrs(self):
"""Return a dictionary of attributes setable by keyword argument with their types."""
try:
return self._public_attrs_real # pylint: disable=no-member
except AttributeError:
self._public_attrs_real = {} # pylint: disable=attribute-defined-outside-init
return self._public_attrs_real
@_public_attrs.setter
def _public_attrs(self, value):
"""Private property to update the list of public attributes."""
self._public_attrs_real.update(dict(value)) # pylint: disable=no-member
@property
def metadata(self) -> Dict:
"""Read the metadata dictionary."""
try:
return self._metadata
except AttributeError: # Oops no metadata yet
self._metadata = typeHintedDict()
return self._metadata
@metadata.setter
def metadata(self, value: IterableType) -> None:
"""Update the metadata object with type checking."""
if not isinstance(value, typeHintedDict) and isiterable(value):
self._metadata = typeHintedDict(value)
elif isinstance(value, typeHintedDict):
self._metadata = value
else:
raise TypeError(f"metadata must be something that can be turned into a dictionary, not a {value}")
def __getitem__(self, name: Union[str, RegExp]) -> Any:
"""Pass through to metadata dictionary."""
return self.metadata[name]
def __setitem__(self, name: Union[str, RegExp], value: Any) -> None:
"""Pass through to metadata dictionary."""
self.metadata[name] = value
def __delitem__(self, name: Union[str, RegExp]) -> None:
"""Pass through to metadata dictionary."""
del self.metadata[name]
def __eq__(self, other: Any) -> bool:
"""Implement am equality test for metadataObjects."""
if not isinstance(other, metadataObject):
return False
if len(self) != len(other):
return False
ret = self.metadata ^ other.metadata
return len(ret) == 0
def __len__(self) -> int:
"""Pass through to metadata dictionary."""
return len(self.metadata)
def __iter__(self) -> Generator:
"""Pass through to metadata dictionary."""
return self.metadata.__iter__()
def keys(self) -> str:
"""Return the keys of the metadata dictionary."""
for k in self.metadata.keys():
yield k
def items(self) -> Tuple[str, Any]:
"""Make sure we implement an items that doesn't just iterate over self."""
for k, v in self.metadata.items():
yield k, v
def values(self) -> Any:
"""Return the values of the metadata dictionary."""
for v in self.metadata.values():
yield v
def save(self, filename: Filename = None, **kargs: Any):
"""Stub method for a save function."""
raise NotImplementedError("Save is not implemented in the base class.")
@classmethod
def load(cls, filename: Filename, *args: Any, **kargs: Any) -> "metadataObject":
"""Stub method for a load function."""
raise NotImplementedError("Save is not implemented in the base class.")
class SortedMultivalueDict(OrderedDict):
"""Implement a simple multivalued dictionary where the values are always sorted lists of elements."""
@classmethod
def _matching(cls, val: Tuple[int, str] | List[Tuple[int, str]]) -> List[Tuple[int, str]]:
match val:
case (int(p), item):
return [(p, item)]
case [(int(p), item), *rest]:
return sorted([(p, item)] + cls._matching(rest))
case []:
return []
case _:
raise TypeError("Can only add items that are a typle of int,value")
def get_value_list(self, name):
"""Get the values stored in the dictionary under name."""
return [item for _, item in self.get(name, [])]
def __setitem__(self, name: Any, val: Union[List[Tuple[int, Any]], Tuple[int, Any]]) -> None:
"""Insert or replace a value and then sort the values."""
values = self._matching(val)
for p, value in values: # pylint: disable=not-an-iterable
for ix, (_, old_value) in enumerate(self.get(name, [])):
if old_value == value: # replacing existing value
self[name][ix] = (p, value)
break
else:
super().__setitem__(name, self.get(name, []) + [(p, value)])
super().__setitem__(name, sorted(self[name], key=lambda item: (item[0], str(item[1]))))
# Don't double add metadata
if pd is not None and not hasattr(pd.DataFrame, "metadata"):
@pd.api.extensions.register_dataframe_accessor("metadata")
class PandasMetadata(typeHintedDict):
"""Add a typehintedDict to PandasDataFrames."""
def __init__(self, pandas_obj):
super().__init__()
self._obj = pandas_obj