Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[INLONG-7072][Manager][Sort] Resource adaptive adjustment for Hudi #7077

Draft
wants to merge 3 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -71,3 +71,8 @@ inlong-sdk/dataproxy-sdk-twins/dataproxy-sdk-cpp/release/bin/

# Docker mysql storage
docker/docker-compose/mysql

/inlong-sort/connectors
/plugins
/inlong-sort/sort-dist.jar
/inlong-manager/plugins
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please remove these lines

4 changes: 4 additions & 0 deletions inlong-dashboard/src/locales/cn.json
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,10 @@
"meta.Sinks.Hudi.PartitionFieldListHelp": "字段类型若为timestamp,则必须设置此字段值的格式,支持 MICROSECONDS,MILLISECONDS,SECONDS,SQL,ISO_8601,以及自定义,比如:yyyy-MM-dd HH:mm:ss 等",
"meta.Sinks.Hudi.FieldFormat": "字段格式",
"meta.Sinks.Hudi.ExtListHelper": "hudi表的DDL属性需带前缀'ddl.'",
"meta.Sinks.Hudi.CapacityPerRowUnit": "KB",
"meta.Sinks.Hudi.CapacityPerRow": "每条大小",
"meta.Sinks.Hudi.RecordPreDay": "天数据量",
"meta.Sinks.Hudi.RecordPreDayUnit": "条",
"meta.Sinks.Greenplum.TableName": "表名称",
"meta.Sinks.Greenplum.PrimaryKey": "主键",
"meta.Sinks.Greenplum.FieldName": "字段名",
Expand Down
4 changes: 4 additions & 0 deletions inlong-dashboard/src/locales/en.json
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,10 @@
"meta.Sinks.Hudi.PartitionFieldListHelp": "If the field type is timestamp, you must set the format of the field value, support MICROSECONDS, MILLISECONDS, SECONDS, SQL, ISO_8601, and custom, such as: yyyy-MM-dd HH:mm:ss, etc.",
"meta.Sinks.Hudi.FieldFormat": "FieldFormat",
"meta.Sinks.Hudi.ExtListHelper": "The DDL attribute of the hudi table needs to be prefixed with 'ddl.'",
"meta.Sinks.Hudi.RecordPreDayUnit": "row",
"meta.Sinks.Hudi.RecordPreDay": "RecordPreDay",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

RecordPerDay, not PreDay.

"meta.Sinks.Hudi.CapacityPerRowUnit": "KB",
"meta.Sinks.Hudi.CapacityPerRow": "CapacityPerRow",
"meta.Sinks.Greenplum.TableName": "TableName",
"meta.Sinks.Greenplum.PrimaryKey": "PrimaryKey",
"meta.Sinks.Greenplum.FieldName": "FieldName",
Expand Down
26 changes: 26 additions & 0 deletions inlong-dashboard/src/metas/sinks/defaults/Hudi.ts
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,32 @@ export default class HudiSink extends SinkInfo implements DataWithBackend, Rende
@ColumnDecorator()
@I18n('meta.Sinks.Hudi.PrimaryKey')
primaryKey: string;

@FieldDecorator({
type: 'inputnumber',
initialValue: 100000,
props: values => ({
disabled: [110, 130].includes(values?.status),
min: 1,
}),
rules: [{ required: true }],
suffix: i18n.t('meta.Sinks.Hudi.RecordPreDayUnit'),
})
@I18n('meta.Sinks.Hudi.RecordPreDay')
recordPreDay: number;

@FieldDecorator({
type: 'inputnumber',
initialValue: 10,
props: values => ({
disabled: [110, 130].includes(values?.status),
min: 1,
}),
rules: [{ required: true }],
suffix: i18n.t('meta.Sinks.Hudi.CapacityPerRowUnit'),
})
@I18n('meta.Sinks.Hudi.CapacityPerRow')
capacityPerRow: number;
}

const getFieldListColumns = sinkValues => {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ public class IntegrationTaskRunner implements Runnable {
private final FlinkInfo flinkInfo;
private final Integer commitType;

// TODO: add job resource required
public IntegrationTaskRunner(FlinkService flinkService, FlinkInfo flinkInfo, Integer commitType) {
this.flinkService = flinkService;
this.flinkInfo = flinkInfo;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,7 @@ public Map<String, String> tableOptions() {
options.put(HUDI_OPTION_RECORD_KEY_FIELD_NAME, primaryKey);
options.put("connector", "hudi-inlong");

// TODO adoptive of hudi operator partitions
return options;
}

Expand Down
138 changes: 138 additions & 0 deletions inlong-tools/dev/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
<!--

Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.

-->

## Apache InLong dev toolkit
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please do not commit unrelated codes with this issue.


### Overview

The Apache InLong project includes modules such as "Dashboard", "DataProxy", "Manager" and "TubeMQ".
When debugging in a development environment such as `Intellij IDEA`,
The environment is difficult to build.

Take the `inlong-manager` as an example, it depends on too many configurations and packages.
When debugging locally in the IDE, multiple default directories need to be created, which is more complicated.
So a script is urgently needed to quickly support local debugging.

Also, similar tools are required for local debugging of other modules.
Temporarily named `InLong dev toolkit`, looking forward to adding more features.

### Use case

#### Help

```shell
❯ inlong-tools/dev/inlong-dev-toolkit.sh
####################################################################################
# Welcome to use the Apache InLong dev toolkit! #
# @2022-12-23 20:38:34 #
####################################################################################


inlong-tools/dev/inlong-dev-toolkit.sh help | h
:help

inlong-tools/dev/inlong-dev-toolkit.sh manager | m
:build manager local debugging environment

Have a nice day, bye!

```

#### Manager

Rely of Manager:

| Directory/File | link target | Rely modules |
|--------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------|---------------------------------|
| `./plugins` | `inlong-manager/manger-plugins/target/plugins` | `inlong-sort/sort-plugin` |
| `./inlong-sort/connectors` | - | `inlong-sort/sort-connectors/*` | |
| `./inlong-sort/connectors/sort-connector-${name}-${project.version}.jar` | `inlong-sort/sort-connectors/${connector.name}/target/sort-connector-${connector.name}-${project.version}.jar` | `inlong-sort/sort-connectors/*` |
| `./sort-dist.jar` | `sort-dist/target/sort-dist-${project.version}.jar` | `inlong-sort/sort-dist` |

```shell
❯ inlong-tools/dev/inlong-dev-toolkit.sh m
Execute action: manager
# start build manager local debugging environment ...
current_version: 1.5.0-SNAPSHOT
associate plugins directory: inlong-manager/manager-plugins/target/plugins
associate sort dist: inlong-sort/sort-dist/target/sort-dist
recreate connector dir: inlong-sort/connectors
All connector names:
hive mysql-cdc kafka jdbc pulsar iceberg postgres-cdc mongodb-cdc sqlserver-cdc oracle-cdc elasticsearch-6 elasticsearch-7 redis tubemq filesystem doris starrocks hudi
associate connector: hive
associate connector: mysql-cdc
associate connector: kafka
associate connector: jdbc
associate connector: pulsar
associate connector: iceberg
associate connector: postgres-cdc
associate connector: mongodb-cdc
associate connector: sqlserver-cdc
associate connector: oracle-cdc
associate connector: elasticsearch-6
associate connector: elasticsearch-7
associate connector: redis
associate connector: tubemq
associate connector: filesystem
associate connector: doris
associate connector: starrocks
associate connector: hudi
build dev env of manager finished.
Have a nice day, bye!
```

### Development: add more function

#### Public variables

| inner variable | implication |
|--------------------|-----------------------------------|
| `script_dir` | the directory of this script file |
| `script_file_name` | the script file name |
| `base_dir` | the root directory of project |

#### Step 1. implement function

#### Step 2. register function

Create a global array variable:

- It is recommended to end with `_action`, such as `manager_action`.
- It must have 4 elements.
- The 1st element is the long opt command.
- The 2nd element is the sort opt command.
- The 3rd element is the details of the function.
- The 4th element is the function name, which implements in Step 1.

Add the variable to the `actions` array, like:

```shell
actions=(
help_action
manger_action
)
```

#### Step 3. ignore the temporary file

Add temporary files to `.gitignore`

> Notice: must base on project base directory
143 changes: 143 additions & 0 deletions inlong-tools/dev/inlong-dev-toolkit.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
#!/bin/bash
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Initialize the configuration files of inlong components

script_dir=$(dirname "$0")
script_file_name=$(basename "$0")

# the absolute dir of project
base_dir=$(
cd $script_dir
cd ../..
pwd
)

help_action=(
'help'
'h'
'help: show all actions'
'welcome'
)

manager_action=(
'manager'
'm'
'build manager local debugging environment'
'manager'
)

actions=(
help_action
manager_action
)

function welcome() {
local_date_time=$(date +"%Y-%m-%d %H:%M:%S")
echo '####################################################################################'
echo '# Welcome to use Apache InLong dev toolkit ! #'
echo '# @'$local_date_time' #'
echo '####################################################################################'
echo ''

# shellcheck disable=SC2068
for action in ${actions[@]}; do
# shellcheck disable=SC1087
TMP=$action[@]
TempB=("${!TMP}")
name=${TempB[0]}
simple_name=${TempB[1]}
desc=${TempB[2]}

echo $script_dir'/'$script_file_name' '$name' | '$simple_name
echo ' :'$desc
done
echo ''
}

function manager() {
echo '# start build manager local debugging environment ...'

project_version=$(mvn -q \
-Dexec.executable=echo \
-Dexec.args='${project.version}' \
--non-recursive \
exec:exec)

echo 'current_version: '"$project_version"
#
echo 'associate plugins directory: inlong-manager/manager-plugins/target/plugins'
# plugins -> manager-plugins/target/plugins
cd "$base_dir"
rm -rf plugins
ln -s inlong-manager/manager-plugins/target/plugins plugins
#
echo 'associate sort dist: inlong-sort/sort-dist/target/sort-dist'
cd "$base_dir"/inlong-sort
rm -rf sort-dist.jar
ln -s sort-dist/target/sort-dist-"$project_version".jar sort-dist.jar
# inlong-manager: plugins -> manager-plugins/target/plugins
cd "$base_dir"/inlong-manager
rm -rf plugins
ln -s manager-plugins/target/plugins plugins
# mkdir inlong-sort/connectors
sort_connector_dir=$base_dir/inlong-sort/connectors
echo 'recreate connector dir: '"$sort_connector_dir"
# shellcheck disable=SC2086
rm -rf $sort_connector_dir
mkdir "$sort_connector_dir"
cd "$sort_connector_dir"
connector_names=$(grep '<module>' "$base_dir"/inlong-sort/sort-connectors/pom.xml | sed 's/<module>//g' | sed 's/<\/module>//g' | grep -v base)

echo 'All connector names: '
echo $connector_names | tr -d '\n'

for connector_name in $connector_names; do
echo 'associate connector: '"$connector_name"
connector_suffix_name=$(echo "$connector_name" | sed 's/elasticsearch-6/elasticsearch6/g' | sed 's/elasticsearch-7/elasticsearch7/g')
ln -s ../sort-connectors/"${connector_name}"/target/sort-connector-"${connector_suffix_name}"-"$project_version".jar sort-connector-"${connector_name}".jar
done

echo 'build dev env of manager finished .'
}

function main() {
action=$1

if [ ! -n "$action" ]; then
welcome
fi

# shellcheck disable=SC2068
for one_action in ${actions[@]}; do
# shellcheck disable=SC1087
TMP=$one_action[@]
TempB=("${!TMP}")
name=${TempB[0]}
simple_name=${TempB[1]}
function_name=${TempB[3]}
desc=${TempB[4]}

if [[ X"$name" == X"$action" ]] || [[ X"$simple_name" == X"$action" ]]; then
echo 'Execute action: '"$function_name"
$function_name
fi
done

echo 'Have a nice day, bye!'
}

main $1