Skip to content

Commit

Permalink
For Excel Reader, support defining a default_glossary. When a cell co…
Browse files Browse the repository at this point in the history
…ntains only `termName`

it will add `@<default_glossary>`.

Support manually overriding the default_glossary term by specifying `term@OtherGlossary`
in the cell value.

Closes #264
  • Loading branch information
wjohnson committed Dec 22, 2023
1 parent f5fa20c commit 971d009
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 4 deletions.
5 changes: 5 additions & 0 deletions pyapacheatlas/readers/excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,11 @@ class ExcelConfiguration(ReaderConfiguration):
:param str process_prefix:
Defaults to "Process" and represents the prefix of the columns
in Excel to be considered related to the table process.
:param str default_glossary:
Defaults to "Glossary" and identifies what the glossary suffix
should be for the `[Relationship] meanings` column if not
provided. Can be manually overridden in Excel with `term@OtherGlossary`
as the cell value.
:param str column_transformation_name:
Defaults to "transformation" and identifies the column that
represents the transformation for a specific column.
Expand Down
18 changes: 14 additions & 4 deletions pyapacheatlas/readers/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,14 @@

class ReaderConfiguration():
"""
A base configuration for the Reader class. Allows you to customize
headers with a source_prefix, target_prefix, and process_prefix for
parsing table and column lineages.
A base configuration for the Reader class. Allows you to:
* value_separator: Defaults to `;` control the separator for lists of objects.
* source_prefix: Defaults to `Source` control the field name for Source in lineage parsing
* target_prefix: Defaults to `Target` control the field name for Target in lineage parsing
* process_prefix: Defaults to `Process` control the field name for Process in lineage parsing
* column_transformation_name: Defaults to `transformation` control the field name for indiciating what the transformation is, deprecated
* default_glossary: Defaults to `Glossary` when you specify a term this is added with `@<Value>` unless the user provided an `@` in the term.
"""

def __init__(self, **kwargs):
Expand All @@ -33,6 +38,7 @@ def __init__(self, **kwargs):
"process_prefix", "Process")
self.column_transformation_name = kwargs.get(
"column_transformation_name", "transformation")
self.default_glossary = kwargs.get("default_glossary", "Glossary")


class Reader(LineageMixIn):
Expand Down Expand Up @@ -159,7 +165,11 @@ def _organize_attributes(self, row, existing_entities, ignore=[]):
reference_object = [
{"typeName": "AtlasGlossaryTerm",
"uniqueAttributes": {
"qualifiedName": "{}@Glossary".format(t)
# Allow for a default glossary term
# or for the user to manually specify it
# This might break others usage of `@` in their
# glossary terms though
"qualifiedName": f"{t}@{self.config.default_glossary}" if "@" not in t else t
}
} for t in terms
]
Expand Down

0 comments on commit 971d009

Please sign in to comment.