Skip to content

Commit

Permalink
Add Iceberg provider (apache#39155)
Browse files Browse the repository at this point in the history
* add log for running callback

* revert

* add iceberg provider

* add tabular deprecation

* fix comment

* fix comment

* fix redirect

* deprecated tabular, fix integration name

* fix connections.rst

* merge with main

* remove iceberg default host

* add to providers bug report

* fix changelog and revert __init__

* remove redirects, remove tabular new version, revert latest docs only

* remove deperecated hook

* add deprecation warning

* revert tabular connection and add iceberg connection

* fix iceberg tests

* fix iceberg connection test

* fix iceberg connection

* mock the correct connection

* tabular should not have tests

* remove deprecated hook from yaml

* fix integration name

* add iceberg logo

* fix integrations

* fix iceberg logo location

* revert tabular in extra-packages-ref

* fix docs
  • Loading branch information
romsharon98 authored and RodrigoGanancia committed May 10, 2024
1 parent 03c3658 commit 3bace5f
Show file tree
Hide file tree
Showing 47 changed files with 601 additions and 209 deletions.
1 change: 1 addition & 0 deletions .github/ISSUE_TEMPLATE/airflow_providers_bug_report.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ body:
- apache-flink
- apache-hdfs
- apache-hive
- apache-iceberg
- apache-impala
- apache-kafka
- apache-kylin
Expand Down
8 changes: 4 additions & 4 deletions .github/boring-cyborg.yml
Original file line number Diff line number Diff line change
Expand Up @@ -478,10 +478,10 @@ labelPRBasedOnFilePath:
- tests/system/providers/tableau/**/*

provider:tabular:
- airflow/providers/tabular/**/*
- docs/apache-airflow-providers-tabular/**/*
- tests/providers/tabular/**/*
- tests/system/providers/tabular/**/*
- airflow/providers/apache/iceberg/**/*
- docs/apache-airflow-providers-apache-iceberg/**/*
- tests/providers/apache/iceberg/**/*
- tests/system/providers/apache/iceberg/**/*

provider:telegram:
- airflow/providers/telegram/**/*
Expand Down
4 changes: 2 additions & 2 deletions INSTALL
Original file line number Diff line number Diff line change
Expand Up @@ -277,8 +277,8 @@ or dependencies that are necessary to enable the feature in editable build.
# START PROVIDER EXTRAS HERE

airbyte, alibaba, amazon, apache.beam, apache.cassandra, apache.drill, apache.druid, apache.flink,
apache.hdfs, apache.hive, apache.impala, apache.kafka, apache.kylin, apache.livy, apache.pig,
apache.pinot, apache.spark, apprise, arangodb, asana, atlassian.jira, celery, cloudant,
apache.hdfs, apache.hive, apache.iceberg, apache.impala, apache.kafka, apache.kylin, apache.livy,
apache.pig, apache.pinot, apache.spark, apprise, arangodb, asana, atlassian.jira, celery, cloudant,
cncf.kubernetes, cohere, common.io, common.sql, databricks, datadog, dbt.cloud, dingding, discord,
docker, elasticsearch, exasol, fab, facebook, ftp, github, google, grpc, hashicorp, http, imap,
influxdb, jdbc, jenkins, microsoft.azure, microsoft.mssql, microsoft.psrp, microsoft.winrm, mongo,
Expand Down
32 changes: 32 additions & 0 deletions airflow/providers/apache/iceberg/CHANGELOG.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
.. Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
.. http://www.apache.org/licenses/LICENSE-2.0
.. Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
.. NOTE TO CONTRIBUTORS:
Please, only add notes to the Changelog just below the "Changelog" header when there are some breaking changes
and you want to add an explanation to the users on how they are supposed to deal with them.
The changelog is updated and maintained semi-automatically by release manager.
``apache-airflow-providers-apache-iceberg``


Changelog
---------

1.0.0
.....

Initial version of the provider.
41 changes: 41 additions & 0 deletions airflow/providers/apache/iceberg/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
# NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE
# OVERWRITTEN WHEN PREPARING DOCUMENTATION FOR THE PACKAGES.
#
# IF YOU WANT TO MODIFY THIS FILE, YOU SHOULD MODIFY THE TEMPLATE
# `PROVIDER__INIT__PY_TEMPLATE.py.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY
#
from __future__ import annotations

import importlib.metadata

import packaging.version

__all__ = ["__version__"]

__version__ = "1.0.0"

airflow_version = importlib.metadata.version("apache-airflow")

if packaging.version.parse(packaging.version.parse(airflow_version).base_version) < packaging.version.parse(
"2.7.0"
):
raise RuntimeError(
f"The package `apache-airflow-providers-apache-iceberg:{__version__}` needs Apache Airflow 2.7.0+"
)
File renamed without changes.
90 changes: 90 additions & 0 deletions airflow/providers/apache/iceberg/hooks/iceberg.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from __future__ import annotations

from typing import Any

import requests
from requests import HTTPError

from airflow.hooks.base import BaseHook

TOKENS_ENDPOINT = "oauth/tokens"


class IcebergHook(BaseHook):
"""
This hook acts as a base hook for iceberg services.
It offers the ability to generate temporary, short-lived
session tokens to use within Airflow submitted jobs.
:param iceberg_conn_id: The :ref:`Iceberg connection id<howto/connection:iceberg>`
which refers to the information to connect to the Iceberg.
"""

conn_name_attr = "iceberg_conn_id"
default_conn_name = "iceberg_default"
conn_type = "iceberg"
hook_name = "Iceberg"

@classmethod
def get_ui_field_behaviour(cls) -> dict[str, Any]:
"""Return custom UI field behaviour for Iceberg connection."""
return {
"hidden_fields": ["schema", "port"],
"relabeling": {
"host": "Base URL",
"login": "Client ID",
"password": "Client Secret",
},
"placeholders": {
"login": "client_id (token credentials auth)",
"password": "secret (token credentials auth)",
},
}

def __init__(self, iceberg_conn_id: str = default_conn_name) -> None:
super().__init__()
self.conn_id = iceberg_conn_id

def test_connection(self) -> tuple[bool, str]:
"""Test the Iceberg connection."""
try:
self.get_conn()
return True, "Successfully fetched token from Iceberg"
except HTTPError as e:
return False, f"HTTP Error: {e}: {e.response.text}"
except Exception as e:
return False, str(e)

def get_conn(self) -> str:
"""Obtain a short-lived access token via a client_id and client_secret."""
conn = self.get_connection(self.conn_id)
base_url = conn.host
base_url = base_url.rstrip("/")
client_id = conn.login
client_secret = conn.password
data = {"client_id": client_id, "client_secret": client_secret, "grant_type": "client_credentials"}

response = requests.post(f"{base_url}/{TOKENS_ENDPOINT}", data=data)
response.raise_for_status()

return response.json()["access_token"]

def get_token_macro(self):
return f"{{{{ conn.{self.conn_id}.get_hook().get_conn() }}}}"
49 changes: 49 additions & 0 deletions airflow/providers/apache/iceberg/provider.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

---
package-name: apache-airflow-providers-apache-iceberg
name: Iceberg
description: |
`Iceberg <https://iceberg.apache.org/>`__
state: ready
source-date-epoch: 1705912293
# note that those versions are maintained by release manager - do not update them manually
versions:
- 1.0.0

dependencies:
- apache-airflow>=2.7.0

devel-dependencies:
- pyiceberg>=0.5.0

integrations:
- integration-name: Iceberg
logo: /integration-logos/iceberg/iceberg.png
external-doc-url: https://iceberg.apache.org/
tags: [software]

hooks:
- integration-name: Iceberg
python-modules:
- airflow.providers.apache.iceberg.hooks.iceberg

connection-types:
- hook-class-name: airflow.providers.apache.iceberg.hooks.iceberg.IcebergHook
connection-type: iceberg
25 changes: 0 additions & 25 deletions airflow/providers/tabular/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,28 +14,3 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
# NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE
# OVERWRITTEN WHEN PREPARING DOCUMENTATION FOR THE PACKAGES.
#
# IF YOU WANT TO MODIFY THIS FILE, YOU SHOULD MODIFY THE TEMPLATE
# `PROVIDER__INIT__PY_TEMPLATE.py.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY
#
from __future__ import annotations

import importlib.metadata

import packaging.version

__all__ = ["__version__"]

__version__ = "1.5.0"

airflow_version = importlib.metadata.version("apache-airflow")

if packaging.version.parse(packaging.version.parse(airflow_version).base_version) < packaging.version.parse(
"2.7.0"
):
raise RuntimeError(
f"The package `apache-airflow-providers-tabular:{__version__}` needs Apache Airflow 2.7.0+"
)
1 change: 0 additions & 1 deletion airflow/providers/tabular/hooks/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
Expand Down
82 changes: 10 additions & 72 deletions airflow/providers/tabular/hooks/tabular.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,80 +14,18 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from __future__ import annotations

from typing import Any

import requests
from requests import HTTPError

from airflow.hooks.base import BaseHook

DEFAULT_TABULAR_URL = "https://api.tabulardata.io/ws/v1"

TOKENS_ENDPOINT = "oauth/tokens"


class TabularHook(BaseHook):
"""
This hook acts as a base hook for tabular services.
"""This module is deprecated. Please use :mod:`airflow.providers.apache.iceberg.hooks.iceberg`."""

It offers the ability to generate temporary, short-lived
session tokens to use within Airflow submitted jobs.
:param tabular_conn_id: The :ref:`Tabular connection id<howto/connection:tabular>`
which refers to the information to connect to the Tabular OAuth service.
"""

conn_name_attr = "tabular_conn_id"
default_conn_name = "tabular_default"
conn_type = "tabular"
hook_name = "Tabular"

@classmethod
def get_ui_field_behaviour(cls) -> dict[str, Any]:
"""Return custom UI field behaviour for Tabular connection."""
return {
"hidden_fields": ["schema", "port"],
"relabeling": {
"host": "Base URL",
"login": "Client ID",
"password": "Client Secret",
},
"placeholders": {
"host": DEFAULT_TABULAR_URL,
"login": "client_id (token credentials auth)",
"password": "secret (token credentials auth)",
},
}

def __init__(self, tabular_conn_id: str = default_conn_name) -> None:
super().__init__()
self.conn_id = tabular_conn_id

def test_connection(self) -> tuple[bool, str]:
"""Test the Tabular connection."""
try:
self.get_conn()
return True, "Successfully fetched token from Tabular"
except HTTPError as e:
return False, f"HTTP Error: {e}: {e.response.text}"
except Exception as e:
return False, str(e)

def get_conn(self) -> str:
"""Obtain a short-lived access token via a client_id and client_secret."""
conn = self.get_connection(self.conn_id)
base_url = conn.host if conn.host else DEFAULT_TABULAR_URL
base_url = base_url.rstrip("/")
client_id = conn.login
client_secret = conn.password
data = {"client_id": client_id, "client_secret": client_secret, "grant_type": "client_credentials"}
from __future__ import annotations

response = requests.post(f"{base_url}/{TOKENS_ENDPOINT}", data=data)
response.raise_for_status()
import warnings

return response.json()["access_token"]
from airflow.exceptions import AirflowProviderDeprecationWarning
from airflow.providers.apache.iceberg.hooks.iceberg import IcebergHook # noqa: F401

def get_token_macro(self):
return f"{{{{ conn.{self.conn_id}.get_hook().get_conn() }}}}"
warnings.warn(
"This module is deprecated. Please use `airflow.providers.apache.iceberg.hooks.iceberg`.",
AirflowProviderDeprecationWarning,
stacklevel=2,
)

0 comments on commit 3bace5f

Please sign in to comment.