Skip to content

Commit

Permalink
Merge pull request #12 from Recidiviz/dan/api-performance
Browse files Browse the repository at this point in the history
Refactor API data-access pattern to only load what is necessary; use prepared statements
  • Loading branch information
ohaibbq committed Apr 17, 2024
2 parents 729f8c1 + 5dc4e1d commit 61539ca
Show file tree
Hide file tree
Showing 12 changed files with 923 additions and 458 deletions.
2 changes: 1 addition & 1 deletion go.mod
Expand Up @@ -89,6 +89,6 @@ require (
google.golang.org/genproto/googleapis/api v0.0.0-20230822172742-b8732ec3820d // indirect
)

replace github.com/goccy/go-zetasqlite => github.com/Recidiviz/go-zetasqlite v0.18.0-recidiviz.7
replace github.com/goccy/go-zetasqlite => github.com/Recidiviz/go-zetasqlite v0.18.0-recidiviz.8

replace github.com/mattn/go-sqlite3 => github.com/Recidiviz/go-sqlite3 v0.0.0-20240220230115-bffb5ad78048
4 changes: 2 additions & 2 deletions go.sum
Expand Up @@ -29,8 +29,8 @@ github.com/JohnCGriffin/overflow v0.0.0-20211019200055-46fa312c352c h1:RGWPOewvK
github.com/JohnCGriffin/overflow v0.0.0-20211019200055-46fa312c352c/go.mod h1:X0CRv0ky0k6m906ixxpzmDRLvX58TFUKS2eePweuyxk=
github.com/Recidiviz/go-sqlite3 v0.0.0-20240220230115-bffb5ad78048 h1:G8qFbNf/6IWYup4//DcrwsMYvAl80qZk9hEb6Z+UfKc=
github.com/Recidiviz/go-sqlite3 v0.0.0-20240220230115-bffb5ad78048/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y=
github.com/Recidiviz/go-zetasqlite v0.18.0-recidiviz.7 h1:wvjkJOGE9xCk4WtzNedjHOPuudqmqn9yz3Son8SPVRQ=
github.com/Recidiviz/go-zetasqlite v0.18.0-recidiviz.7/go.mod h1:KVfVr9Lp7/4FH0Eeiunu1Dh274lxKJvWwpQWEkoRkuA=
github.com/Recidiviz/go-zetasqlite v0.18.0-recidiviz.8 h1:OFNdqgtpVfUhcU5wIq0Uipxm7eZ+eiolpVGhrEFt1yA=
github.com/Recidiviz/go-zetasqlite v0.18.0-recidiviz.8/go.mod h1:KVfVr9Lp7/4FH0Eeiunu1Dh274lxKJvWwpQWEkoRkuA=
github.com/andybalholm/brotli v1.0.4 h1:V7DdXeJtZscaqfNuAdSRuRFzuiKlHSC/Zh3zl9qY3JY=
github.com/andybalholm/brotli v1.0.4/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig=
github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY=
Expand Down
5 changes: 2 additions & 3 deletions internal/contentdata/repository.go
Expand Up @@ -5,11 +5,10 @@ import (
"database/sql"
"fmt"
"github.com/goccy/go-zetasqlite"
"reflect"
"strings"

"go.uber.org/zap"
bigqueryv2 "google.golang.org/api/bigquery/v2"
"reflect"
"strings"

"github.com/goccy/bigquery-emulator/internal/connection"
"github.com/goccy/bigquery-emulator/internal/logger"
Expand Down
164 changes: 51 additions & 113 deletions internal/metadata/dataset.go
Expand Up @@ -13,47 +13,11 @@ import (
var ErrDuplicatedTable = errors.New("table is already created")

type Dataset struct {
ID string
ProjectID string
tables []*Table
tableMap map[string]*Table
models []*Model
modelMap map[string]*Model
routines []*Routine
routineMap map[string]*Routine
mu sync.RWMutex
content *bigqueryv2.Dataset
repo *Repository
}

func (d *Dataset) TableIDs() []string {
d.mu.RLock()
defer d.mu.RUnlock()
tableIDs := make([]string, 0, len(d.tables))
for _, table := range d.tables {
tableIDs = append(tableIDs, table.ID)
}
return tableIDs
}

func (d *Dataset) ModelIDs() []string {
d.mu.RLock()
defer d.mu.RUnlock()
modelIDs := make([]string, 0, len(d.models))
for _, model := range d.models {
modelIDs = append(modelIDs, model.ID)
}
return modelIDs
}

func (d *Dataset) RoutineIDs() []string {
d.mu.RLock()
defer d.mu.RUnlock()
routineIDs := make([]string, 0, len(d.routines))
for _, routine := range d.routines {
routineIDs = append(routineIDs, routine.ID)
}
return routineIDs
ID string
ProjectID string
mu sync.RWMutex
content *bigqueryv2.Dataset
repo *Repository
}

func (d *Dataset) Content() *bigqueryv2.Dataset {
Expand Down Expand Up @@ -101,126 +65,100 @@ func (d *Dataset) UpdateContent(newContent *bigqueryv2.Dataset) {
}

func (d *Dataset) Insert(ctx context.Context, tx *sql.Tx) error {
d.mu.Lock()
defer d.mu.Unlock()
return d.repo.AddDataset(ctx, tx, d)
}

func (d *Dataset) Delete(ctx context.Context, tx *sql.Tx) error {
d.mu.Lock()
defer d.mu.Unlock()
return d.repo.DeleteDataset(ctx, tx, d)
}

func (d *Dataset) DeleteModel(ctx context.Context, tx *sql.Tx, id string) error {
d.mu.Lock()
defer d.mu.Unlock()
model, exists := d.modelMap[id]
if !exists {
return fmt.Errorf("model '%s' is not found in dataset '%s'", id, d.ID)
}
if err := model.Delete(ctx, tx); err != nil {
model, err := d.repo.FindModel(ctx, d.ProjectID, d.ID, id)
if err != nil {
return err
}
newModels := make([]*Model, 0, len(d.models))
for _, model := range d.models {
if model.ID == id {
continue
}
newModels = append(newModels, model)
if model != nil {
return fmt.Errorf("model '%s' is not found in dataset '%s'", id, d.ID)
}
d.models = newModels
delete(d.modelMap, id)
if err := d.repo.UpdateDataset(ctx, tx, d); err != nil {
if err := model.Delete(ctx, tx); err != nil {
return err
}
return nil
}

func (d *Dataset) AddTable(ctx context.Context, tx *sql.Tx, table *Table) error {
d.mu.Lock()
if _, exists := d.tableMap[table.ID]; exists {
d.mu.Unlock()
exists, err := d.repo.TableExists(ctx, tx, d.ProjectID, d.ID, table.ID)
if err != nil {
return err
}
if exists {
return fmt.Errorf("table %s: %w", table.ID, ErrDuplicatedTable)
}

d.mu.Lock()
defer d.mu.Unlock()
if err := table.Insert(ctx, tx); err != nil {
d.mu.Unlock()
return err
}
d.tables = append(d.tables, table)
d.tableMap[table.ID] = table
d.mu.Unlock()

if err := d.repo.UpdateDataset(ctx, tx, d); err != nil {
return err
}
return nil
}

func (d *Dataset) Table(id string) *Table {
d.mu.RLock()
defer d.mu.RUnlock()
return d.tableMap[id]
func (d *Dataset) Table(ctx context.Context, id string) (*Table, error) {
d.mu.Lock()
defer d.mu.Unlock()
return d.repo.FindTable(ctx, d.ProjectID, d.ID, id)
}

func (d *Dataset) Model(id string) *Model {
d.mu.RLock()
defer d.mu.RUnlock()
return d.modelMap[id]
func (d *Dataset) Model(ctx context.Context, id string) (*Model, error) {
d.mu.Lock()
defer d.mu.Unlock()
return d.repo.FindModel(ctx, d.ProjectID, d.ID, id)
}

func (d *Dataset) Routine(id string) *Routine {
d.mu.RLock()
defer d.mu.RUnlock()
return d.routineMap[id]
func (d *Dataset) Routine(ctx context.Context, id string) (*Routine, error) {
d.mu.Lock()
defer d.mu.Unlock()
return d.repo.FindRoutine(ctx, d.ProjectID, d.ID, id)
}

func (d *Dataset) Tables() []*Table {
d.mu.RLock()
defer d.mu.RUnlock()
return d.tables
func (d *Dataset) Tables(ctx context.Context) ([]*Table, error) {
d.mu.Lock()
defer d.mu.Unlock()
return d.repo.FindTablesInDatasets(ctx, d.ProjectID, d.ID)
}

func (d *Dataset) Models() []*Model {
d.mu.RLock()
defer d.mu.RUnlock()
return d.models
func (d *Dataset) Models(ctx context.Context) ([]*Model, error) {
d.mu.Lock()
defer d.mu.Unlock()
return d.repo.FindModelsInDataset(ctx, d.ProjectID, d.ID)
}

func (d *Dataset) Routines() []*Routine {
d.mu.RLock()
defer d.mu.RUnlock()
return d.routines
func (d *Dataset) Routines(ctx context.Context) ([]*Routine, error) {
d.mu.Lock()
defer d.mu.Unlock()
return d.repo.FindRoutinesInDataset(ctx, d.ProjectID, d.ID)
}

func NewDataset(
repo *Repository,
projectID string,
datasetID string,
content *bigqueryv2.Dataset,
tables []*Table,
models []*Model,
routines []*Routine) *Dataset {

tableMap := map[string]*Table{}
for _, table := range tables {
tableMap[table.ID] = table
}
modelMap := map[string]*Model{}
for _, model := range models {
modelMap[model.ID] = model
}
routineMap := map[string]*Routine{}
for _, routine := range routines {
routineMap[routine.ID] = routine
}

) *Dataset {
return &Dataset{
ID: datasetID,
ProjectID: projectID,
tables: tables,
tableMap: tableMap,
models: models,
modelMap: modelMap,
routines: routines,
routineMap: routineMap,
content: content,
repo: repo,
ID: datasetID,
ProjectID: projectID,
content: content,
repo: repo,
}
}

0 comments on commit 61539ca

Please sign in to comment.