ray-project · devin-petersohn · Apr 11, 2019 · Apr 15, 2019 · Apr 15, 2019
diff --git a/exercises/exercise02-Task_Dependencies.ipynb b/exercises/exercise02-Task_Dependencies.ipynb
@@ -227,7 +227,7 @@
     "from bs4 import BeautifulSoup\n",
     "import requests\n",
     "\n",
-    "import pandas as pd"
+    "import modin.pandas as pd"
    ]
   },
   {
@@ -250,22 +250,21 @@
     "\n",
     "def parse_google_response(keyword, response):\n",
     "    soup = BeautifulSoup(response.text, 'lxml')\n",
-    "    df = pd.DataFrame(columns=['title', 'link', 'description'])\n",
+    "    data = {'title': [], 'link': [], 'description': [], 'library': []}\n",
     "    for g in soup.find_all(class_='g'):\n",
     "        entry = {}\n",
     "        headers = g.find_all(class_='r')\n",
     "        # Sometimes results have no headers\n",
     "        if len(headers) != 1:\n",
     "            continue\n",
-    "        entry['title'] = headers[0].text\n",
-    "        entry['link'] = headers[0].find('a').get('href')[7:]\n",
+    "        data['title'].append(headers[0].text)\n",
+    "        data['link'].append(headers[0].find('a').get('href')[7:])\n",
     "        description = g.find_all(class_='st')\n",
-    "        entry['description'] = description[0].text if len(description) > 0 else \"\"\n",
-    "        df = df.append(pd.DataFrame(entry, index=[0]))\n",
-    "    \n",
-    "    df['library'] = keyword\n",
-    "    return df\n",
+    "        data['description'].append(description[0].text if len(description) > 0 else \"\")\n",
+    "        data['library'].append(keyword)\n",
+    "    return data\n",
     "\n",
+    "# @ray.remote\n",
     "def get_results(keyword):\n",
     "    response = query_google(\"learn {}\".format(keyword))\n",
     "    return parse_google_response(keyword, response)"
@@ -285,13 +284,13 @@
    "outputs": [],
    "source": [
     "start = time.time()\n",
-    "keywords = [\"ray\", \"rllib\", \"tune\", \"modin\", \"plasma\", \"arrow\"]\n",
+    "keywords = [\"ray\", \"rllib\", \"tune\", \"modin.pandas\", \"plasma\", \"arrow\"]\n",
     "results = []\n",
     "for keyword in keywords:\n",
-    "    df = get_results(keyword)\n",
-    "    results.append(df)\n",
-    "    \n",
-    "df = pd.concat(results)\n",
+    "    data = get_results(keyword)\n",
+    "    results.append(data)\n",
+    "\n",
+    "df = pd.concat([pd.DataFrame(r) for r in results])\n",
     "duration = time.time() - start\n",
     "print(\"Constructing the dataframe took {} seconds.\".format(duration))"
    ]
@@ -311,6 +310,14 @@
    "source": [
     "df"
    ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**<center> The results of the queries are now stored in a Modin DataFrame (`df`), a DataFrame with an identical API to pandas.**\n",
+    "**<center> Learn more about using the Modin DataFrame in the `examples/accelerate_pandas.ipynb` tutorial**"
+   ]
   }
  ],
  "metadata": {
@@ -329,7 +336,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.1"
+   "version": "3.6.7"
   },
   "toc": {
    "base_numbering": 1,