From 7b2d2b29e4bdd96b61f8523e0eb8393122712775 Mon Sep 17 00:00:00 2001 From: Hugo Bollon Date: Fri, 25 Jun 2021 10:34:07 +0200 Subject: [PATCH] feat: parallelized db refreshing and add mutex security + code review fixes from #177 --- README.md | 19 ++-- db/db.go | 11 ++- main.go | 96 +++++++++---------- test/{ => multiple-minio-buckets}/Makefile | 6 ++ test/{ => multiple-minio-buckets}/config.yml | 0 .../multiple-minio-buckets/docker-compose.yml | 64 +++++++++++++ test/single-minio-bucket/Makefile | 11 +++ .../docker-compose.yml | 27 +----- 8 files changed, 151 insertions(+), 83 deletions(-) rename test/{ => multiple-minio-buckets}/Makefile (55%) rename test/{ => multiple-minio-buckets}/config.yml (100%) create mode 100644 test/multiple-minio-buckets/docker-compose.yml create mode 100644 test/single-minio-bucket/Makefile rename test/{ => single-minio-bucket}/docker-compose.yml (70%) diff --git a/README.md b/README.md index 956032ac..1f3e979d 100644 --- a/README.md +++ b/README.md @@ -150,19 +150,23 @@ Terraboard currently supports configuration in three different ways: 2. CLI parameters **(only usable for mono provider configuration)** 3. Configuration file (YAML). A configuration file example can be found in the root directory of this repository and in the `test/` subdirectory. -**Important: all flags/environment variables related to the providers settings aren't compatible with multi-provider configuration! For that, you must use the Yaml config file to be able to configure multiples buckets/providers.** +**Important: all flags/environment variables related to the providers settings aren't compatible with multi-provider configuration! Instead, you must use the YAML config file to be able to configure multiples buckets/providers.** The precedence of configurations is as described below. ### Multiple buckets/providers -In order to be able to link to Terraboard multiples buckets or even providers, you must use the Yaml configuration method. +In order for Terraboard to import states from multiples buckets or even providers, you must use the YAML configuration method: -- Set the environment variable **CONFIG_FILE** or the flag **-c** / **--config-file** to a valid Yaml config file. - -- In the Yaml file, specify your desired providers configuration. For example with two S3 buckets: +- Set the `CONFIG_FILE` environment variable or the `-c`/`--config-file` flag to point to a valid YAML config file. +- In the YAML file, specify your desired providers configuration. For example with two MinIO buckets (using the AWS provider with compatible mode): ```yaml +# Needed since MinIO doesn't support versioning or locking +provider: + no-locks: true + no-versioning: true + aws: - endpoint: http://minio:9000/ region: eu-west-1 @@ -179,13 +183,12 @@ aws: force-path-style: true file-extension: - .tfstate - ``` -In the case of AWS, don't forget to set **AWS_ACCESS_KEY_ID** and **AWS_SECRET_ACCESS_KEY** environment variables. +In the case of AWS, don't forget to set the `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY` environment variables. That's it! Terraboard will now fetch these two buckets on DB refresh. You can also mix providers like AWS and Gitlab or anything else. -You can find a ready-to-use docker example with two *MinIO* buckets in the `test/` sub-folder (just swipe the two terraboard services in the docker-compose file). +You can find a ready-to-use Docker example with two *MinIO* buckets in the `test/multiple-minio-buckets/` sub-folder. ### Available parameters diff --git a/db/db.go b/db/db.go index 0c2f86f3..2262d506 100644 --- a/db/db.go +++ b/db/db.go @@ -7,6 +7,7 @@ import ( "net/url" "strconv" "strings" + "sync" "github.com/camptocamp/terraboard/config" "github.com/camptocamp/terraboard/state" @@ -25,6 +26,7 @@ import ( // Database is a wrapping structure to *gorm.DB type Database struct { *gorm.DB + lock sync.Mutex } var pageSize = 20 @@ -79,7 +81,7 @@ func Init(config config.DBConfig, debug bool) *Database { db.Config.Logger.LogMode(logger.Info) } - d := &Database{db} + d := &Database{DB: db} if err = d.MigrateLineage(); err != nil { log.Fatalf("Lineage migration failed: %v\n", err) } @@ -124,11 +126,14 @@ func (db *Database) stateS3toDB(sf *statefile.File, path string, versionID strin // Check if the associated lineage is already present in lineages table // If so, it recovers its ID otherwise it inserts it at the same time as the state var lineage types.Lineage + db.lock.Lock() err = db.FirstOrCreate(&lineage, types.Lineage{Value: sf.Lineage}).Error if err != nil || lineage.ID == 0 { - log.Error("Unknown error in stateS3toDB during lineage finding") + log.WithField("error", err). + Error("Unknown error in stateS3toDB during lineage finding", err) return types.State{}, err } + db.lock.Unlock() st = types.State{ Path: path, @@ -242,10 +247,12 @@ func (db *Database) UpdateState(st types.State) error { // InsertVersion inserts an AWS S3 Version in the Database func (db *Database) InsertVersion(version *state.Version) error { var v types.Version + db.lock.Lock() db.FirstOrCreate(&v, types.Version{ VersionID: version.ID, LastModified: version.LastModified, }) + db.lock.Unlock() return nil } diff --git a/main.go b/main.go index 245f21bc..1e96b2fc 100644 --- a/main.go +++ b/main.go @@ -65,60 +65,55 @@ func isKnownStateVersion(statesVersions map[string][]string, versionID, path str // Refresh the DB // This should be the only direct bridge between the state providers and the DB -func refreshDB(syncInterval uint16, d *db.Database, sps []state.Provider) { +func refreshDB(syncInterval uint16, d *db.Database, sp state.Provider) { interval := time.Duration(syncInterval) * time.Minute - log.Debugf("Providers: %+v\n", sps) for { log.Infof("Refreshing DB") - log.Debugf("Total providers: %d\n", len(sps)) - for i, sp := range sps { - log.Debugf("Fetching provider %d/%d\n", i+1, len(sps)) - states, err := sp.GetStates() - if err != nil { - log.WithFields(log.Fields{ - "error": err, - }).Error("Failed to retrieve states. Retrying in 1 minute.") - time.Sleep(interval) - continue - } + states, err := sp.GetStates() + if err != nil { + log.WithFields(log.Fields{ + "error": err, + }).Error("Failed to retrieve states. Retrying in 1 minute.") + time.Sleep(interval) + continue + } - statesVersions := d.ListStatesVersions() - for _, st := range states { - versions, _ := sp.GetVersions(st) - for k, v := range versions { - if _, ok := statesVersions[v.ID]; ok { - log.WithFields(log.Fields{ - "version_id": v.ID, - }).Debug("Version is already in the database, skipping") - } else { - if err := d.InsertVersion(&versions[k]); err != nil { - log.Error(err.Error()) - } + statesVersions := d.ListStatesVersions() + for _, st := range states { + versions, _ := sp.GetVersions(st) + for k, v := range versions { + if _, ok := statesVersions[v.ID]; ok { + log.WithFields(log.Fields{ + "version_id": v.ID, + }).Debug("Version is already in the database, skipping") + } else { + if err := d.InsertVersion(&versions[k]); err != nil { + log.Error(err.Error()) } + } - if isKnownStateVersion(statesVersions, v.ID, st) { - log.WithFields(log.Fields{ - "path": st, - "version_id": v.ID, - }).Debug("State is already in the database, skipping") - continue - } - state, err := sp.GetState(st, v.ID) - if err != nil { - log.WithFields(log.Fields{ - "path": st, - "version_id": v.ID, - "error": err, - }).Error("Failed to fetch state from bucket") - continue - } - if err = d.InsertState(st, v.ID, state); err != nil { - log.WithFields(log.Fields{ - "path": st, - "version_id": v.ID, - "error": err, - }).Error("Failed to insert state in the database") - } + if isKnownStateVersion(statesVersions, v.ID, st) { + log.WithFields(log.Fields{ + "path": st, + "version_id": v.ID, + }).Debug("State is already in the database, skipping") + continue + } + state, err := sp.GetState(st, v.ID) + if err != nil { + log.WithFields(log.Fields{ + "path": st, + "version_id": v.ID, + "error": err, + }).Error("Failed to fetch state from bucket") + continue + } + if err = d.InsertState(st, v.ID, state); err != nil { + log.WithFields(log.Fields{ + "path": st, + "version_id": v.ID, + "error": err, + }).Error("Failed to insert state in the database") } } } @@ -173,7 +168,10 @@ func main() { if c.DB.NoSync { log.Infof("Not syncing database, as requested.") } else { - go refreshDB(c.DB.SyncInterval, database, sps) + log.Debugf("Total providers: %d\n", len(sps)) + for _, sp := range sps { + go refreshDB(c.DB.SyncInterval, database, sp) + } } defer database.Close() diff --git a/test/Makefile b/test/multiple-minio-buckets/Makefile similarity index 55% rename from test/Makefile rename to test/multiple-minio-buckets/Makefile index 56c02ef3..3edb92d0 100644 --- a/test/Makefile +++ b/test/multiple-minio-buckets/Makefile @@ -1,5 +1,11 @@ + UID=$(shell id -u) GID=$(shell id -g) +build: + UID="${UID}" GID="${GID}" docker-compose build + test: UID="${UID}" GID="${GID}" docker-compose up -d + +all: build test \ No newline at end of file diff --git a/test/config.yml b/test/multiple-minio-buckets/config.yml similarity index 100% rename from test/config.yml rename to test/multiple-minio-buckets/config.yml diff --git a/test/multiple-minio-buckets/docker-compose.yml b/test/multiple-minio-buckets/docker-compose.yml new file mode 100644 index 00000000..51b4d2e3 --- /dev/null +++ b/test/multiple-minio-buckets/docker-compose.yml @@ -0,0 +1,64 @@ +--- +version: "3.8" +services: + terraboard-dev: + build: + context: ../../ + dockerfile: ./Dockerfile + environment: + AWS_ACCESS_KEY_ID: root + AWS_SECRET_ACCESS_KEY: mypassword + DB_SSLMODE: disable + CONFIG_FILE: config/config.yml + GODEBUG: netdns=go + depends_on: + db: + condition: service_healthy + minio: + condition: service_started + volumes: + - ../../static:/static:ro + - ./config.yml:/config/config.yml:ro + ports: + - "8080:8080" + + minio: + image: minio/minio:latest + environment: + MINIO_ROOT_USER: root + MINIO_ROOT_PASSWORD: mypassword + user: "${UID}:${GID}" + expose: + - "9000" + ports: + - "9200:9000" + volumes: + - ../data:/data + command: server /data + + db: + image: postgres:9.5 + environment: + POSTGRES_USER: gorm + POSTGRES_PASSWORD: mypassword + POSTGRES_DB: gorm + volumes: + - tb-data:/var/lib/postgresql/data + healthcheck: + test: ["CMD-SHELL", "pg_isready -U postgres"] + interval: 10s + timeout: 5s + retries: 5 + + pgadmin: + container_name: pgadmin4_container + image: dpage/pgadmin4 + restart: always + environment: + PGADMIN_DEFAULT_EMAIL: admin@admin.com + PGADMIN_DEFAULT_PASSWORD: root + ports: + - "5050:80" + +volumes: + tb-data: {} diff --git a/test/single-minio-bucket/Makefile b/test/single-minio-bucket/Makefile new file mode 100644 index 00000000..3edb92d0 --- /dev/null +++ b/test/single-minio-bucket/Makefile @@ -0,0 +1,11 @@ + +UID=$(shell id -u) +GID=$(shell id -g) + +build: + UID="${UID}" GID="${GID}" docker-compose build + +test: + UID="${UID}" GID="${GID}" docker-compose up -d + +all: build test \ No newline at end of file diff --git a/test/docker-compose.yml b/test/single-minio-bucket/docker-compose.yml similarity index 70% rename from test/docker-compose.yml rename to test/single-minio-bucket/docker-compose.yml index 37c59d00..f8785745 100755 --- a/test/docker-compose.yml +++ b/test/single-minio-bucket/docker-compose.yml @@ -3,7 +3,7 @@ version: "3.8" services: terraboard-dev: build: - context: ../ + context: ../../ dockerfile: ./Dockerfile environment: AWS_ACCESS_KEY_ID: root @@ -24,31 +24,10 @@ services: minio: condition: service_started volumes: - - ../static:/static:ro + - ../../static:/static:ro ports: - "8080:8080" - # terraboard-dev-multi-providers: - # build: - # context: ../ - # dockerfile: ./Dockerfile - # environment: - # AWS_ACCESS_KEY_ID: root - # AWS_SECRET_ACCESS_KEY: mypassword - # DB_SSLMODE: disable - # CONFIG_FILE: config/config.yml - # GODEBUG: netdns=go - # depends_on: - # db: - # condition: service_healthy - # minio: - # condition: service_started - # volumes: - # - ../static:/static:ro - # - ./config.yml:/config/config.yml:ro - # ports: - # - "8081:8080" - minio: image: minio/minio:latest environment: @@ -60,7 +39,7 @@ services: ports: - "9200:9000" volumes: - - ./data:/data + - ../data:/data command: server /data db: