yhat · kljh · Apr 5, 2019 · Apr 7, 2019
diff --git a/README.md b/README.md
@@ -1,8 +1,8 @@
 pandasql
 ========
 
-`pandasql` allows you to query `pandas` DataFrames using SQL syntax. It works 
-similarly to `sqldf` in R. `pandasql` seeks to provide a more familiar way of 
+`pandasql` allows you to query `pandas` DataFrames using SQL syntax. It works
+similarly to `sqldf` in R. `pandasql` seeks to provide a more familiar way of
 manipulating and cleaning data for people new to Python or `pandas`.
 
 #### Installation
@@ -15,15 +15,15 @@ The main function used in pandasql is `sqldf`. `sqldf` accepts 2 parametrs
    - a sql query string
    - a set of session/environment variables (`locals()` or `globals()`)
 
-Specifying `locals()` or `globals()` can get tedious. You can define a short 
+Specifying `locals()` or `globals()` can get tedious. You can define a short
 helper function to fix this.
 
     from pandasql import sqldf
-    pysqldf = lambda q: sqldf(q, globals())
+    pysqldf = lambda q, params=None: sqldf(q, locals(), params=params)
 
 #### Querying
-`pandasql` uses [SQLite syntax](http://www.sqlite.org/lang.html). Any `pandas` 
-dataframes will be automatically detected by `pandasql`. You can query them as 
+`pandasql` uses [SQLite syntax](http://www.sqlite.org/lang.html). Any `pandas`
+dataframes will be automatically detected by `pandasql`. You can query them as
 you would any regular SQL table.
 
 
@@ -76,9 +76,19 @@ joins and aggregations are also supported
 4  1948        8766
 ```
 
+queries with parameters are supported
+```
+>>> iris = load_iris()
+>>> iris_df = pd.DataFrame(iris.data, columns=iris.feature_names)
+>>> print pyqldf("SELECT DISTINCT species FROM iris_df WHERE species <> ? ", params=('versicolor',) )
+     species
+0     setosa
+1  virginica
+```
+
 More information and code samples available in the [examples](https://github.com/yhat/pandasql/blob/master/examples/demo.py)
  folder or on [our blog](http://blog.yhathq.com/posts/pandasql-sql-for-pandas-dataframes.html).
 
 
 
-[![Analytics](https://ga-beacon.appspot.com/UA-46996803-1/pandasql/README.md)](https://github.com/yhat/pandasql)    
+[![Analytics](https://ga-beacon.appspot.com/UA-46996803-1/pandasql/README.md)](https://github.com/yhat/pandasql)
diff --git a/examples/demo2.py b/examples/demo2.py
@@ -0,0 +1,25 @@
+import os, time
+import pandas as pd
+from pandasql import sqldf
+
+# dummy DataFrame
+data = [ [ "abc", 123, True, "C:\\temp" ], [ "d'ef", -45.6, False, "C:\\windows" ], [ "xyz", 0.89, 0, "/usr/" ] ]
+df = pd.DataFrame(data, columns = [ "id", "n", "b", "f" ])
+
+
+# define 'pysqldf' as per pandasql documentation, with extra params and user-defined-functions registration
+
+def my_sqlite_connect_listener( dbapi_con, con_record ):
+    # registering a few extra functions to SQLite
+    dbapi_con.create_function( 'IIF', 3, lambda b, t, f : t if b else f )
+    dbapi_con.create_function( 'CUBE', 1, lambda x : x*x*x )
+    dbapi_con.create_function( 'FileExists', 1, lambda f : os.path.exists(f) )
+    dbapi_con.create_function( 'FileModificationDate', 1, lambda f : time.ctime(os.path.getmtime(f)) if os.path.exists(f) else None)
+
+pysqldf = lambda q, params=None: sqldf(q, globals(), params=params, sqlite_connect_listener=my_sqlite_connect_listener)
+
+
+# demo of request using the extra functions
+print(pysqldf("select n, IIF(n<0, 'n is negative', 'n is positive') from df where id<>?", params = ('abc', )))
+print(pysqldf("select CUBE(2), CUBE(3), CUBE(4), CUBE(5)"))
+print(pysqldf("select f, FileExists(f), FileModificationDate(f) from df"))
diff --git a/pandasql/sqldf.py b/pandasql/sqldf.py
@@ -15,7 +15,7 @@ class PandaSQLException(Exception):
 
 
 class PandaSQL:
-    def __init__(self, db_uri='sqlite:///:memory:', persist=False):
+    def __init__(self, db_uri='sqlite:///:memory:', persist=False, sqlite_connect_listener=None):
         """
         Initialize with a specific database.
 
@@ -26,6 +26,8 @@ def __init__(self, db_uri='sqlite:///:memory:', persist=False):
 
         if self.engine.name == 'sqlite':
             listen(self.engine, 'connect', self._set_text_factory)
+        if self.engine.name == 'sqlite' and sqlite_connect_listener is not None:
+            listen(self.engine, 'connect', sqlite_connect_listener)
 
         if self.engine.name not in ('sqlite', 'postgresql'):
             raise PandaSQLException('Currently only sqlite and postgresql are supported.')
@@ -36,7 +38,7 @@ def __init__(self, db_uri='sqlite:///:memory:', persist=False):
             self._conn = self.engine.connect()
             self._init_connection(self._conn)
 
-    def __call__(self, query, env=None):
+    def __call__(self, query, env=None, params=None):
         """
         Execute the SQL query.
         Automatically creates tables mentioned in the query from dataframes before executing.
@@ -61,7 +63,7 @@ def __call__(self, query, env=None):
                 write_table(env[table_name], table_name, conn)
 
             try:
-                result = read_sql(query, conn)
+                result = read_sql(query, conn, params=params)
             except DatabaseError as ex:
                 raise PandaSQLException(ex)
             except ResourceClosedError:
@@ -126,7 +128,7 @@ def write_table(df, tablename, conn):
                index=not any(name is None for name in df.index.names))  # load index into db if all levels are named
 
 
-def sqldf(query, env=None, db_uri='sqlite:///:memory:'):
+def sqldf(query, env=None, db_uri='sqlite:///:memory:', persist=False, sqlite_connect_listener=None, params=None):
     """
     Query pandas data frames using sql syntax
     This function is meant for backward compatibility only. New users are encouraged to use the PandaSQL class.
@@ -158,4 +160,4 @@ def sqldf(query, env=None, db_uri='sqlite:///:memory:'):
     >>> sqldf("select * from df;", locals())
     >>> sqldf("select avg(x) from df;", locals())
     """
-    return PandaSQL(db_uri)(query, env)
+    return PandaSQL(db_uri, persist, sqlite_connect_listener)(query, env, params)