Skip to content

Commit

Permalink
The Merge: add scripts to merge and delete groups (#2527)
Browse files Browse the repository at this point in the history
* WIP (the script works! just not tested on prod)

* don't notify or update group embeddings during The Merge

* Make merge groups script more robust

* Add script to merge all dupe names

* Add delete group script

* Merge dupes by name_fts

* Fix groupLink format

* fix

* Revert "don't notify or update group embeddings during The Merge"

This reverts commit 1e00129.
  • Loading branch information
sipec committed Mar 12, 2024
1 parent 9140e4a commit 0195e9e
Show file tree
Hide file tree
Showing 4 changed files with 264 additions and 3 deletions.
49 changes: 49 additions & 0 deletions backend/scripts/delete-group.ts
@@ -0,0 +1,49 @@
import { updateGroupLinksOnContracts } from 'merge-groups'
import { runScript } from 'run-script'
import { SupabaseDirectClient } from 'shared/supabase/init'

async function deleteGroup(
pg: SupabaseDirectClient,
firestore: any,
slug: string
) {
const groupId = await pg.one(
'select id from groups where slug = $1',
[slug],
(row) => row.id
)

console.log('removing group from posts')
await pg.none('update old_posts set group_id = null where group_id = $1', [
groupId,
])

const contracts = await pg.map(
'select contract_id from group_contracts where group_id = $1',
[groupId],
(row) => row.contract_id
)

if (contracts.length > 0) {
console.log('removing group from contracts')
await pg.none('delete from group_contracts where group_id = $1', [groupId])
console.log('correcting contract group slugs')
await updateGroupLinksOnContracts(pg, firestore, contracts)
}

console.log('removing group members')
await pg.none('delete from group_members where group_id = $1', [groupId])
console.log('deleting group')
await pg.none('delete from groups where id = $1', [groupId])
}

if (require.main === module) {
if (process.argv.length < 3) {
console.error('usage: delete-group.ts <group slug>')
process.exit(1)
}

runScript(async ({ pg, firestore }) => {
await deleteGroup(pg, firestore, process.argv[2])
})
}
62 changes: 62 additions & 0 deletions backend/scripts/merge-all-dupe-names.ts
@@ -0,0 +1,62 @@
import { mergeGroups } from 'merge-groups'
import { runScript } from 'run-script'
import { SupabaseDirectClient } from 'shared/supabase/init'

const mergeAllDupes = async (pg: SupabaseDirectClient, firestore: any) => {
const dupeNames = await pg.manyOrNone<{
name_fts: string
slug: string
id: string
importance_score: number
rank: number
}>(
`SELECT
name_fts, name, slug, id, importance_score,
ROW_NUMBER() OVER(PARTITION BY name_fts ORDER BY importance_score DESC, total_members DESC) AS rank
FROM groups
WHERE
privacy_status = 'public'
and name_fts in (
select name_fts
from groups
where privacy_status = 'public'
and name_fts != ''
and name not in (
'Anime',
'Animals',
'Animation',
'Avatars',
'Avatar',
'Disney+',
'Disney',
'Curling',
'Curl',
'Musicals',
'Personal',
'Personality',
'Production',
'Productivity',
'Products',
'tests'
)
group by name_fts
having count(*) > 1
)`
)

let top = dupeNames[0]
for (const group of dupeNames) {
if (group.rank == 1) {
top = group
} else if (group.name_fts == top.name_fts) {
console.log('merge', group.slug, top.slug)
await mergeGroups(pg, firestore, group.slug, top.slug)
}
}
}

if (require.main === module) {
runScript(async ({ pg, firestore }) => {
await mergeAllDupes(pg, firestore)
})
}
147 changes: 147 additions & 0 deletions backend/scripts/merge-groups.ts
@@ -0,0 +1,147 @@
import { SafeBulkWriter } from 'shared/safe-bulk-writer'
import { type SupabaseDirectClient } from 'shared/supabase/init'
import { bulkUpsert } from 'shared/supabase/utils'
import { runScript } from 'run-script'
import { upsertGroupEmbedding } from 'shared/helpers/embeddings'

// note: you should turn off the on-update-contract trigger (notifications, embedding recalculation) if it's a ton of contracts

export async function mergeGroups(
pg: SupabaseDirectClient,
firestore: any,
fromSlug: string,
toSlug: string
) {
if (fromSlug === toSlug) {
return
}

const from = await pg.one(
'select id from groups where slug = $1',
[fromSlug],
(row) => row.id
)

const to = await pg.one(
'select id from groups where slug = $1',
[toSlug],
(row) => row.id
)

console.log(`merging ${from} into ${to}`)

console.log('update posts')
await pg.none('update old_posts set group_id = $1 where group_id = $2', [
to,
from,
])

const contracts: string[] = await pg.map(
'select contract_id from group_contracts where group_id = $1',
[from],
(row) => row.contract_id
)

// if (contracts.length > 100) {
// throw new Error(
// `found ${contracts.length} contracts in group ${from}. are you sure?`
// )
// }

if (contracts.length > 0) {
console.log(`re-tagging ${contracts.length} contracts`)
console.log(contracts)

await bulkUpsert(
pg,
'group_contracts',
['group_id', 'contract_id'],
contracts.map((contract) => ({ group_id: to, contract_id: contract }))
)

console.log('removing old group contracts')
await pg.none('delete from group_contracts where group_id = $1', [from])

console.log('correcting contract group slugs')
await updateGroupLinksOnContracts(pg, firestore, contracts)

console.log('recalculating group embedding')
await upsertGroupEmbedding(pg, to)
} else {
console.log('no contracts to re-tag')
}

// move members

const members: string[] = await pg.map(
'select member_id from group_members where group_id = $1',
[from],
(row) => row.member_id
)

console.log(`moving ${members.length} members`)

await bulkUpsert(
pg,
'group_members',
['group_id', 'member_id'],
members.map((member) => ({ group_id: to, member_id: member }))
)

console.log('correcting group member count')

await pg.none(
'update groups set total_members = (select count(*) from group_members where group_id = $1) where id = $1',
[to]
)

console.log('removing old group members')
await pg.none('delete from group_members where group_id = $1', [from])
console.log('removing old group')
await pg.none('delete from groups where id = $1', [from])
}

export async function updateGroupLinksOnContracts(
pg: SupabaseDirectClient,
firestore: any,
contractIds: string[]
) {
const bulkWriter = new SafeBulkWriter()

for (const contractId of contractIds) {
const contractRef = firestore.collection('contracts').doc(contractId)

const groups = await pg.manyOrNone<{
group_id: string
slug: string
name: string
}>(
`select g.id as group_id, g.slug, g.name from groups g join group_contracts gc
on g.id = gc.group_id where gc.contract_id = $1
order by g.importance_score desc`,
[contractId]
)

bulkWriter.update(contractRef, {
groupSlugs: groups.map((g) => g.slug),
groupLinks: groups.map((g) => ({
groupId: g.group_id,
slug: g.slug,
name: g.name,
})),
})
}

await bulkWriter.flush()
}

if (require.main === module) {
if (process.argv.length < 4) {
console.error('usage: merge-groups.ts <from> <to>')
process.exit(1)
}

runScript(async ({ pg, firestore }) => {
await mergeGroups(pg, firestore, process.argv[2], process.argv[3])
})
}
9 changes: 6 additions & 3 deletions backend/shared/src/supabase/utils.ts
Expand Up @@ -71,9 +71,12 @@ export async function bulkUpsert<

const primaryKey = Array.isArray(idField) ? idField.join(', ') : idField
const upsertAssigns = cs.assignColumns({ from: 'excluded', skip: idField })
const query = `${baseQueryReplaced} on ${
onConflict ? onConflict : `conflict(${primaryKey})`
} do update set ${upsertAssigns}`
const query =
`${baseQueryReplaced} on ` +
(onConflict ? onConflict : `conflict(${primaryKey})`) +
' ' +
(upsertAssigns ? `do update set ${upsertAssigns}` : `do nothing`)

await db.none(query)
}

Expand Down

0 comments on commit 0195e9e

Please sign in to comment.