-
Notifications
You must be signed in to change notification settings - Fork 57
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
bench(fdw): Latency #461
Comments
cc @xieydd |
We need to evaluate the performance difference of a Cloud RDS, such as supabase, over a local pgvecto.rs using the fdw scenario. A simple sample is provided below: #!/usr/bin/python
# ref: https://leileiluoluo.com/posts/postgres-foreign-data-wrappers.html
import psycopg2
import psycopg2.extras
import numpy as np
import time
DIMS = 10
LOCAL_DATABASE = "user=postgres.xxx password=xxx host=aws-0-us-west-1.pooler.supabase.com port=5432 dbname=postgres"
LOCAL_USER_DATABASE = "user=local_user.xxx password=secret host=aws-0-us-west-1.pooler.supabase.com port=5432 dbname=postgres"
FORIGEN_DATABASE = "dbname=tensorchord user=tensorchord host=xxx.modelz.tech port=5432 password=xxx sslmode=require"
repeated_array_str = ','.join(map(str, ["random()" for _ in range(DIMS)]))
random_array_str = ','.join(map(str, [np.random.random() for _ in range(DIMS)]))
local_conn = psycopg2.connect(LOCAL_DATABASE)
local_conn.autocommit = True
local_user_conn = psycopg2.connect(LOCAL_USER_DATABASE)
local_user_conn.autocommit = True
forigen_conn = psycopg2.connect(FORIGEN_DATABASE)
forigen_conn.autocommit = True
FDW_DDL = """
DROP TABLE IF EXISTS test;
CREATE TABLE test (id integer PRIMARY KEY, embedding vector({dims}) NOT NULL);
"""
LOCAL_DDL = """
DROP TABLE IF EXISTS local;
CREATE TABLE local (id integer PRIMARY KEY, name VARCHAR(50) NOT NULL);
"""
INSERT_LOCAL = "INSERT INTO local (id, name) VALUES (1, 'terry'), (2, 'jason'), (3, 'curry')"
INSERT_TEST = "INSERT INTO test SELECT i, ARRAY[{array}]::real[] FROM generate_series(1, 100) i;"
DELETE_TEST = "DELETE FROM test;"
CREATE_INDEX = 'CREATE INDEX ON test USING vectors (embedding vector_l2_ops) WITH (options = "[indexing.flat]");'
DELETE_INDEX = "DELETE FROM test WHERE i = {i};"
QUERY = "SELECT * FROM test ORDER BY embedding <-> '[{array}]' LIMIT 10;"
CREATE_FOREIGN_SERVER="""
CREATE SERVER foreign_server
FOREIGN DATA WRAPPER postgres_fdw
OPTIONS (host 'test-burntu0m01gami31.modelz.tech', port '5432', dbname 'tensorchord', sslmode 'require');
"""
CREATE_USER_MAPPER="""
CREATE USER MAPPING FOR local_user
SERVER foreign_server
OPTIONS (user 'fdw_user', password 'secret');
"""
CREATE_FOREIGN_TABLE="""
CREATE FOREIGN TABLE foreign_test (id integer, embedding extensions.vector({dims}) NOT NULL)
SERVER foreign_server
OPTIONS (schema_name 'public', table_name 'test');
"""
IMPORT_FOREIGN_SCHEMA="""
IMPORT FOREIGN SCHEMA public LIMIT TO (test)
FROM SERVER foreign_server
INTO public;
"""
with forigen_conn.cursor() as cursor:
# Create Table
cursor.execute(FDW_DDL.format(dims = DIMS))
cursor.execute(INSERT_TEST.format(array=repeated_array_str))
# Create User
cursor.execute("CREATE USER fdw_user WITH ENCRYPTED PASSWORD 'secret';")
cursor.execute("GRANT SELECT,INSERT,UPDATE,DELETE ON TABLE test TO fdw_user;")
# Create Index
cursor.execute(CREATE_INDEX)
# Vector Search (Test)
cursor.execute(QUERY.format(array=random_array_str))
results = cursor.fetchall()
print(results[0])
with local_conn.cursor() as cursor:
# Create local table
cursor.execute(LOCAL_DDL)
# Insert Data
cursor.execute(INSERT_LOCAL)
# Create local user and grant permission
cursor.execute("CREATE USER local_user WITH ENCRYPTED PASSWORD 'secret';")
cursor.execute("GRANT SELECT,INSERT,UPDATE,DELETE ON ALL TABLES IN SCHEMA public TO local_user;")
cursor.execute("GRANT ALL ON SCHEMA public TO local_user;")
cursor.execute("GRANT USAGE ON FOREIGN DATA WRAPPER postgres_fdw TO local_user;")
# Create Foreign Server
cursor.execute(CREATE_FOREIGN_SERVER)
cursor.execute("GRANT USAGE ON FOREIGN SERVER foreign_server TO local_user;")
# Crate User Mapping
cursor.execute(CREATE_USER_MAPPER)
# Create Foreign Table
cursor.execute(CREATE_FOREIGN_TABLE.format(dims=DIMS))
cursor.execute("GRANT SELECT,INSERT,UPDATE,DELETE ON ALL TABLES IN SCHEMA extensions TO local_user;")
cursor.execute('SET search_path = "$user", public, extensions;')
# Join query
with local_user_conn.cursor() as local_user_cursor:
local_user_cursor.execute("SELECT l.id, l.name FROM local l LEFT JOIN foreign_test f on l.id = f.id ORDER BY f.embedding <-> '[0.40671515, 0.24202824, 0.37059402, 0.50316447, 0.10779921, 0.80774295, 0.8879849, 0.31292745, 0.05584943, 0.8738258]' LIMIT 10;")
results = local_user_cursor.fetchall()
print(results[0])
|
@xieydd I tried this example but it didn't work. I'm not sure how to make it work when remote has the pgvector extension |
This requires https://github.com/tensorchord/pgvecto.rs-enterprise/pull/8 The latency is mainly affected by the network latency. When testing within AWS US-west2a, the latency is about several milliseconds. remotecreate extension vectors;
set search_path = extensions, public;
create table if not exists remote (
id serial primary key,
vec vector(64) not null
);
insert into remote (vec) select array_fill(random(), array[64])::real[] from generate_series(1, 100000);
CREATE INDEX ON remote USING vectors (vec vector_l2_ops);
select id, vec <-> array_fill(0.5, array[64])::real[] as rank from remote order by rank limit 10;
explain (analyze, verbose) select id, vec <-> array_fill(0.5, array[64])::real[] as rank from remote order by rank limit 10;
CREATE USER fdw_user WITH ENCRYPTED PASSWORD 'secret';
GRANT all on schema extensions to fdw_user;
GRANT all on all tables in schema extensions to fdw_user; localcreate extension postgres_fdw;
create extension vector with schema extensions;
set search_path = extensions, public;
create server remote_server foreign data wrapper postgres_fdw options (
host 'xxx.pooler.supabase.com',
dbname 'postgres',
port '5432',
extensions 'vector'
);
create user mapping for postgres server remote_server options (user 'fdw_user', password 'secret');
import foreign schema extensions limit to (remote) from server remote_server into public;
# the type here is a bit different
select id, vec <-> array_fill(0.5, array[64])::vector as rank from remote order by rank limit 10;
explain (analyze, verbose) select id, vec <-> array_fill(0.5, array[64])::vector as rank from remote order by rank limit 10; |
The text was updated successfully, but these errors were encountered: