Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

The Merge: add scripts to merge and delete groups #2527

Merged
merged 9 commits into from Mar 12, 2024
Merged
49 changes: 49 additions & 0 deletions backend/scripts/delete-group.ts
@@ -0,0 +1,49 @@
import { updateGroupLinksOnContracts } from 'merge-groups'
import { runScript } from 'run-script'
import { SupabaseDirectClient } from 'shared/supabase/init'

async function deleteGroup(
pg: SupabaseDirectClient,
firestore: any,
slug: string
) {
const groupId = await pg.one(
'select id from groups where slug = $1',
[slug],
(row) => row.id
)

console.log('removing group from posts')
await pg.none('update old_posts set group_id = null where group_id = $1', [
groupId,
])

const contracts = await pg.map(
'select contract_id from group_contracts where group_id = $1',
[groupId],
(row) => row.contract_id
)

if (contracts.length > 0) {
console.log('removing group from contracts')
await pg.none('delete from group_contracts where group_id = $1', [groupId])
console.log('correcting contract group slugs')
await updateGroupLinksOnContracts(pg, firestore, contracts)
}

console.log('removing group members')
await pg.none('delete from group_members where group_id = $1', [groupId])
console.log('deleting group')
await pg.none('delete from groups where id = $1', [groupId])
}

if (require.main === module) {
if (process.argv.length < 3) {
console.error('usage: delete-group.ts <group slug>')
process.exit(1)
}

runScript(async ({ pg, firestore }) => {
await deleteGroup(pg, firestore, process.argv[2])
})
}
62 changes: 62 additions & 0 deletions backend/scripts/merge-all-dupe-names.ts
@@ -0,0 +1,62 @@
import { mergeGroups } from 'merge-groups'
import { runScript } from 'run-script'
import { SupabaseDirectClient } from 'shared/supabase/init'

const mergeAllDupes = async (pg: SupabaseDirectClient, firestore: any) => {
const dupeNames = await pg.manyOrNone<{
name_fts: string
slug: string
id: string
importance_score: number
rank: number
}>(
`SELECT
name_fts, name, slug, id, importance_score,
ROW_NUMBER() OVER(PARTITION BY name_fts ORDER BY importance_score DESC, total_members DESC) AS rank
FROM groups
WHERE
privacy_status = 'public'
and name_fts in (
select name_fts
from groups
where privacy_status = 'public'
and name_fts != ''
and name not in (
'Anime',
'Animals',
'Animation',
'Avatars',
'Avatar',
'Disney+',
'Disney',
'Curling',
'Curl',
'Musicals',
'Personal',
'Personality',
'Production',
'Productivity',
'Products',
'tests'
)
group by name_fts
having count(*) > 1
)`
)

let top = dupeNames[0]
for (const group of dupeNames) {
if (group.rank == 1) {
top = group
} else if (group.name_fts == top.name_fts) {
console.log('merge', group.slug, top.slug)
await mergeGroups(pg, firestore, group.slug, top.slug)
}
}
}

if (require.main === module) {
runScript(async ({ pg, firestore }) => {
await mergeAllDupes(pg, firestore)
})
}
147 changes: 147 additions & 0 deletions backend/scripts/merge-groups.ts
@@ -0,0 +1,147 @@
import { SafeBulkWriter } from 'shared/safe-bulk-writer'
import { type SupabaseDirectClient } from 'shared/supabase/init'
import { bulkUpsert } from 'shared/supabase/utils'
import { runScript } from 'run-script'
import { upsertGroupEmbedding } from 'shared/helpers/embeddings'

// note: you should turn off the on-update-contract trigger (notifications, embedding recalculation) if it's a ton of contracts

export async function mergeGroups(
pg: SupabaseDirectClient,
firestore: any,
fromSlug: string,
toSlug: string
) {
if (fromSlug === toSlug) {
return
}

const from = await pg.one(
'select id from groups where slug = $1',
[fromSlug],
(row) => row.id
)

const to = await pg.one(
'select id from groups where slug = $1',
[toSlug],
(row) => row.id
)

console.log(`merging ${from} into ${to}`)

console.log('update posts')
await pg.none('update old_posts set group_id = $1 where group_id = $2', [
to,
from,
])

const contracts: string[] = await pg.map(
'select contract_id from group_contracts where group_id = $1',
[from],
(row) => row.contract_id
)

// if (contracts.length > 100) {
// throw new Error(
// `found ${contracts.length} contracts in group ${from}. are you sure?`
// )
// }

if (contracts.length > 0) {
console.log(`re-tagging ${contracts.length} contracts`)
console.log(contracts)

await bulkUpsert(
pg,
'group_contracts',
['group_id', 'contract_id'],
contracts.map((contract) => ({ group_id: to, contract_id: contract }))
)

console.log('removing old group contracts')
await pg.none('delete from group_contracts where group_id = $1', [from])

console.log('correcting contract group slugs')
await updateGroupLinksOnContracts(pg, firestore, contracts)

console.log('recalculating group embedding')
await upsertGroupEmbedding(pg, to)
} else {
console.log('no contracts to re-tag')
}

// move members

const members: string[] = await pg.map(
'select member_id from group_members where group_id = $1',
[from],
(row) => row.member_id
)

console.log(`moving ${members.length} members`)

await bulkUpsert(
pg,
'group_members',
['group_id', 'member_id'],
members.map((member) => ({ group_id: to, member_id: member }))
)

console.log('correcting group member count')

await pg.none(
'update groups set total_members = (select count(*) from group_members where group_id = $1) where id = $1',
[to]
)

console.log('removing old group members')
await pg.none('delete from group_members where group_id = $1', [from])
console.log('removing old group')
await pg.none('delete from groups where id = $1', [from])
}

export async function updateGroupLinksOnContracts(
pg: SupabaseDirectClient,
firestore: any,
contractIds: string[]
) {
const bulkWriter = new SafeBulkWriter()

for (const contractId of contractIds) {
const contractRef = firestore.collection('contracts').doc(contractId)

const groups = await pg.manyOrNone<{
group_id: string
slug: string
name: string
}>(
`select g.id as group_id, g.slug, g.name from groups g join group_contracts gc
on g.id = gc.group_id where gc.contract_id = $1
order by g.importance_score desc`,
[contractId]
)

bulkWriter.update(contractRef, {
groupSlugs: groups.map((g) => g.slug),
groupLinks: groups.map((g) => ({
groupId: g.group_id,
slug: g.slug,
name: g.name,
})),
})
}

await bulkWriter.flush()
}

if (require.main === module) {
if (process.argv.length < 4) {
console.error('usage: merge-groups.ts <from> <to>')
process.exit(1)
}

runScript(async ({ pg, firestore }) => {
await mergeGroups(pg, firestore, process.argv[2], process.argv[3])
})
}
9 changes: 6 additions & 3 deletions backend/shared/src/supabase/utils.ts
Expand Up @@ -71,9 +71,12 @@ export async function bulkUpsert<

const primaryKey = Array.isArray(idField) ? idField.join(', ') : idField
const upsertAssigns = cs.assignColumns({ from: 'excluded', skip: idField })
const query = `${baseQueryReplaced} on ${
onConflict ? onConflict : `conflict(${primaryKey})`
} do update set ${upsertAssigns}`
const query =
`${baseQueryReplaced} on ` +
(onConflict ? onConflict : `conflict(${primaryKey})`) +
' ' +
(upsertAssigns ? `do update set ${upsertAssigns}` : `do nothing`)

await db.none(query)
}

Expand Down