Skip to content

Commit

Permalink
ENH: to_sql() add parameter "method". Improve docs based on reviews (p…
Browse files Browse the repository at this point in the history
  • Loading branch information
schettino72 committed Jun 10, 2018
1 parent 21e8c04 commit 1e5d1cc
Show file tree
Hide file tree
Showing 3 changed files with 88 additions and 50 deletions.
51 changes: 51 additions & 0 deletions doc/source/io.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4752,6 +4752,57 @@ default ``Text`` type for string columns:
Because of this, reading the database table back in does **not** generate
a categorical.

.. _io.sql.method:

Insertion Method
++++++++++++++++

.. versionadded:: 0.24.0

The parameter ``method`` controls the SQL insertion clause used.

Possible values are:

- `'default'`: Uses standard SQL `INSERT` clause (one per row).
- `'multi'`: Pass multiple values in a single `INSERT` clause.
It uses a **special** SQL syntax not supported by all backends.
This usually provides better performance for Analytic databases
like *Presto* and *Redshit*, but has worse performance for

This comment has been minimized.

Copy link
@nirmit

nirmit Sep 26, 2018

You mean Redshift, right?

traditional SQL backend if the table contains many columns.
For more information check SQLAlchemy `documention
<http://docs.sqlalchemy.org/en/latest/core/dml.html?highlight=multivalues#sqlalchemy.sql.expression.Insert.values.params.*args>`__.
- callable: with signature `(pd_table, conn, keys, data_iter)`.
This can be used to implement more performant insertion based on
specific backend dialect features.
I.e. using *Postgresql* `COPY clause
<https://www.postgresql.org/docs/current/static/sql-copy.html>`__.

Example of callable for Postgresql *COPY*::

# Alternative to_sql() *method* for DBs that support COPY FROM
import csv
from io import StringIO

def psql_insert_copy(table, conn, keys, data_iter):
# gets a DBAPI connection that can provide a cursor
dbapi_conn = conn.connection
with dbapi_conn.cursor() as cur:
s_buf = StringIO()
writer = csv.writer(s_buf)
writer.writerows(data_iter)
s_buf.seek(0)

columns = ', '.join('"{}"'.format(k) for k in keys)
if table.schema:
table_name = '{}.{}'.format(table.schema, table.name)
else:
table_name = table.name

sql = 'COPY {} ({}) FROM STDIN WITH CSV'.format(
table_name, columns)
cur.copy_expert(sql=sql, file=s_buf)


Reading Tables
''''''''''''''

Expand Down
57 changes: 9 additions & 48 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -2055,6 +2055,15 @@ def to_sql(self, name, con, schema=None, if_exists='fail', index=True,
method : {'default', 'multi', callable}, default 'default'
Controls the SQL insertion clause used.
* `'default'`: Uses standard SQL `INSERT` clause (one per row).
* `'multi'`: Pass multiple values in a single `INSERT` clause.
* callable: with signature `(pd_table, conn, keys, data_iter)`.
Details and a sample callable implementation on
section :ref:`insert method <io.sql.method>`.
.. versionadded:: 0.24.0
Raises
------
ValueError
Expand Down Expand Up @@ -2122,54 +2131,6 @@ def to_sql(self, name, con, schema=None, if_exists='fail', index=True,
>>> engine.execute("SELECT * FROM integers").fetchall()
[(1,), (None,), (2,)]
Insertion method:
.. versionadded:: 0.24.0
The parameter ``method`` controls the SQL insertion clause used.
Possible values are:
- `'default'`: Uses standard SQL `INSERT` clause
- `'multi'`: Pass multiple values in a single `INSERT` clause.
It uses a **special** SQL syntax not supported by all backends.
This usually provides a big performance for Analytic databases
like *Presto* and *Redshit*, but has worse performance for
traditional SQL backend if the table contains many columns.
For more information check SQLAlchemy `documention <http://docs.sqlalchemy.org/en/latest/core/dml.html?highlight=multivalues#sqlalchemy.sql.expression.Insert.values.params.*args>`__.
- callable: with signature `(pd_table, conn, keys, data_iter)`.
This can be used to implement more performant insertion based on
specific backend dialect features.
I.e. using *Postgresql* `COPY clause
<https://www.postgresql.org/docs/current/static/sql-copy.html>`__.
Check API for details and a sample implementation
:func:`~pandas.DataFrame.to_sql`.
Example of callable for Postgresql *COPY*::
# Alternative to_sql() *method* for DBs that support COPY FROM
import csv
from io import StringIO
def psql_insert_copy(table, conn, keys, data_iter):
# gets a DBAPI connection that can provide a cursor
dbapi_conn = conn.connection
with dbapi_conn.cursor() as cur:
s_buf = StringIO()
writer = csv.writer(s_buf)
writer.writerows(data_iter)
s_buf.seek(0)
columns = ', '.join('"{}"'.format(k) for k in keys)
if table.schema:
table_name = '{}.{}'.format(table.schema, table.name)
else:
table_name = table.name
sql = 'COPY {} ({}) FROM STDIN WITH CSV'.format(
table_name, columns)
cur.copy_expert(sql=sql, file=s_buf)
"""
from pandas.io import sql
sql.to_sql(self, name, con, schema=schema, if_exists=if_exists,
Expand Down
30 changes: 28 additions & 2 deletions pandas/io/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -436,6 +436,14 @@ def to_sql(frame, name, con, schema=None, if_exists='fail', index=True,
method : {'default', 'multi', callable}, default 'default'
Controls the SQL insertion clause used.
* `'default'`: Uses standard SQL `INSERT` clause (one per row).
* `'multi'`: Pass multiple values in a single `INSERT` clause.
* callable: with signature `(pd_table, conn, keys, data_iter)`.
Details and a sample callable implementation on
section :ref:`insert method <io.sql.method>`.
.. versionadded:: 0.24.0
"""
if if_exists not in ('fail', 'replace', 'append'):
raise ValueError("'{0}' is not valid for if_exists".format(if_exists))
Expand Down Expand Up @@ -635,10 +643,10 @@ def insert_data(self):

return column_names, data_list

def insert(self, chunksize=None, method=None):
def insert(self, chunksize=None, method='default'):

# set insert method
if method in (None, 'default'):
if method == 'default':
exec_insert = self._execute_insert
elif method == 'multi':
exec_insert = self._execute_insert_multi
Expand Down Expand Up @@ -1142,6 +1150,15 @@ def to_sql(self, frame, name, if_exists='fail', index=True,
single value can be used.
method : {'default', 'multi', callable}, default 'default'
Controls the SQL insertion clause used.
* `'default'`: Uses standard SQL `INSERT` clause (one per row).
* `'multi'`: Pass multiple values in a single `INSERT` clause.
* callable: with signature `(pd_table, conn, keys, data_iter)`.
Details and a sample callable implementation on
section :ref:`insert method <io.sql.method>`.
.. versionadded:: 0.24.0
"""
if dtype and not is_dict_like(dtype):
dtype = {col_name: dtype for col_name in frame}
Expand Down Expand Up @@ -1499,6 +1516,15 @@ def to_sql(self, frame, name, if_exists='fail', index=True,
can be used.
method : {'default', 'multi', callable}, default 'default'
Controls the SQL insertion clause used.
* `'default'`: Uses standard SQL `INSERT` clause (one per row).
* `'multi'`: Pass multiple values in a single `INSERT` clause.
* callable: with signature `(pd_table, conn, keys, data_iter)`.
Details and a sample callable implementation on
section :ref:`insert method <io.sql.method>`.
.. versionadded:: 0.24.0
"""
if dtype and not is_dict_like(dtype):
dtype = {col_name: dtype for col_name in frame}
Expand Down

0 comments on commit 1e5d1cc

Please sign in to comment.