[Notebooks] Update hunting notebooks (#229)

Azure · Aug 15, 2023 · 42af024 · 42af024
1 parent 4e09def
commit 42af024
Show file tree

Hide file tree

Showing 2 changed files with 111 additions and 223 deletions.
diff --git a/...rio-notebooks/Hunting-Notebooks/Hunting-AutomatedDataQueryAndIngestionToCustomTable.ipynb b/...rio-notebooks/Hunting-Notebooks/Hunting-AutomatedDataQueryAndIngestionToCustomTable.ipynb
@@ -3,7 +3,7 @@
     {
       "cell_type": "markdown",
       "source": [
-        "# Hunting - Automated Data Query and Ingestion to Custom Table\n",
+        "# Hunting - Automated Data Query and MDTI API and Ingestion to Custom Table\n",
         "\n",
         "__Notebook Version:__ 1.0<br>\n",
         "__Python Version:__ Python 3.8<br>\n",
@@ -32,6 +32,44 @@
       ],
       "metadata": {}
     },
+    {
+      "cell_type": "code",
+      "source": [
+        "%pip install azure.mgmt.loganalytics"
+      ],
+      "outputs": [],
+      "execution_count": null,
+      "metadata": {
+        "jupyter": {
+          "source_hidden": false,
+          "outputs_hidden": false
+        },
+        "nteract": {
+          "transient": {
+            "deleting": false
+          }
+        }
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "%pip install azure.monitor.query"
+      ],
+      "outputs": [],
+      "execution_count": null,
+      "metadata": {
+        "jupyter": {
+          "source_hidden": false,
+          "outputs_hidden": false
+        },
+        "nteract": {
+          "transient": {
+            "deleting": false
+          }
+        }
+      }
+    },
     {
       "cell_type": "code",
       "source": [
@@ -57,7 +95,7 @@
         "# Load Python libraries that will be used in this notebook\n",
         "from azure.mgmt.loganalytics import LogAnalyticsManagementClient\n",
         "from azure.monitor.query import LogsQueryClient, MetricsQueryClient, LogsQueryStatus\n",
-        "#from azure.identity.aio import DefaultAzureCredential\n",
+        "##from azure.identity.aio import DefaultAzureCredential\n",
         "from azure.monitor.ingestion import LogsIngestionClient\n",
         "\n",
         "from azure.identity import AzureCliCredential, DefaultAzureCredential, ClientSecretCredential\n",
@@ -78,17 +116,13 @@
     {
       "cell_type": "code",
       "source": [
-        "# User inputs\r\n",
         "tenant_id = ''\r\n",
         "subscription_id = ''\r\n",
         "akv_name = ''\r\n",
         "akv_link_name = ''\r\n",
         "workspace_id = ''\r\n",
         "client_id_name = ''\r\n",
-        "client_secret_name = ''\r\n",
-        "resource_group_name = \"\"\r\n",
-        "location = \"\"\r\n",
-        "workspace_name = ''"
+        "client_secret_name = ''"
       ],
       "outputs": [],
       "execution_count": null,
@@ -101,13 +135,15 @@
     {
       "cell_type": "code",
       "source": [
-        "# User inputs\r\n",
-        "# Parameters for provisioning resources\"\r\n",
-        "workspace_resource_id = '/subscriptions/{subscription_id}}/resourceGroups/{resource_group_name}/providers/Microsoft.OperationalInsights/workspaces/{workspace_name}'\r\n",
+        "# Parameters for provisioning resources\r\n",
+        "resource_group_name = \"\"\r\n",
+        "location = \"\"\r\n",
+        "workspace_name = ''\r\n",
+        "workspace_resource_id = '/subscriptions/{0}/resourceGroups/{1}/providers/Microsoft.OperationalInsights/workspaces/{2}'.format(subscription_id, resource_group_name, workspace_name)\r\n",
         "dataCollection_endpoint_name = \"\"\r\n",
-        "dataCollection_rule_name = \"\"\r\n",
-        "stream_name = \"\"\r\n",
-        "immutable_rule_id = \"\"\r\n",
+        "dataCollection_rule_name_for_enrichment = \"\"\r\n",
+        "stream_name_for_enrichment = \"\"\r\n",
+        "immutable_rule_id_for_enrichment = \"\"\r\n",
         "dce_endpoint = ''"
       ],
       "outputs": [],
@@ -174,7 +210,7 @@
         "\r\n",
         "end_time =  datetime.now(timezone.utc)\r\n",
         "start_time = end_time - timedelta(5)\r\n",
-        "query = \"DynamicSummary | where TimeGenerated > ago(3d) | project TimeGenerated, MyData = SummaryStatus\"\r\n",
+        "query = \"YOURTABLE | where TimeGenerated > ago(3d) | project TimeGenerated, Url\"\r\n",
         "query_result = la_data_client.query_workspace(\r\n",
         "        workspace_id=workspace_id,\r\n",
         "        query=query,\r\n",
@@ -215,7 +251,7 @@
         "    return response\r\n",
         "\r\n",
         "def get_token_for_graph():\r\n",
-        "    resource_uri = \"https://graph.microsoft.com/ThreatIntelligence.Read.All\"\r\n",
+        "    resource_uri = \"https://graph.microsoft.com\"\r\n",
         "    client_id = mssparkutils.credentials.getSecret(akv_name, client_id_name, akv_link_name)\r\n",
         "    client_secret = mssparkutils.credentials.getSecret(akv_name, client_secret_name, akv_link_name)\r\n",
         "\r\n",
@@ -243,10 +279,9 @@
     {
       "cell_type": "code",
       "source": [
-        "# Calling Microsoft Sentinel Watchlist API\r\n",
-        "# If you don't have Watchlist, you may create one, or try to access different features, such as Bookmarks.\r\n",
+        "# Calling MDTI API, hosts as example\r\n",
         "header_token_value = \"Bearer {}\".format(get_token_for_graph())\r\n",
-        "response_mdti = call_mdti_api_for_read(header_token_value, \"hosts('riskiq.net')\")"
+        "response_mdti_host = call_mdti_api_for_read(header_token_value, \"hosts('www.microsoft.com')\")"
       ],
       "outputs": [],
       "execution_count": null,
@@ -265,9 +300,10 @@
     {
       "cell_type": "code",
       "source": [
-        "if response_mdti != None:\r\n",
-        "    print(response_mdti)\r\n",
-        "    #df_api_data = pd.DataFrame(response_watchlist.json()[\"value\"])"
+        "# Data process\r\n",
+        "df_host = pd.json_normalize(response_mdti_host.json())\r\n",
+        "df_merged = pd.merge(df_la_query, df_host[['id','firstSeenDateTime','registrar']], left_on='Url', right_on='id', how=\"outer\")\r\n",
+        "df_final = df_merged.rename(columns = {'TimeGenerated': 'TimeGenerated', 'Url': 'Url', 'registrar': 'Fact'})[['TimeGenerated', 'Url', 'Fact']]"
       ],
       "outputs": [],
       "execution_count": null,
@@ -284,44 +320,11 @@
       }
     },
     {
-      "cell_type": "code",
+      "cell_type": "markdown",
       "source": [
-        "# Calling Microsoft Sentinel API for List, the same template can be used for calling other Azure REST APIs with different parameters.\r\n",
-        "# For different environments, such as national clouds, you may need to use different root_url, please contact with your admins.\r\n",
-        "# It can be ---.azure.us, ---.azure.microsoft.scloud, ---.azure.eaglex.ic.gov, etc.\r\n",
-        "def call_azure_rest_api_for_get_watchlist_items(token, resource_group_name, sentinel_workspace_name, resource_alias, api_version):\r\n",
-        "    \"Calling Microsoft Sentinel REST API\"\r\n",
-        "    headers = {\"Authorization\": token, \"content-type\":\"application/json\" }\r\n",
-        "    provider_name = \"Microsoft.OperationalInsights\"\r\n",
-        "    provider2_name = \"Microsoft.SecurityInsights\"\r\n",
-        "    target_resource_name = \"watchlists\"\r\n",
-        "    sub_target_source_name = \"watchlistItems\"\r\n",
-        "    api_version = api_version\r\n",
-        "    root_url = \"https://management.azure.com\"\r\n",
-        "    arm_rest_url_template_for_list = \"{0}/subscriptions/{1}/resourceGroups/{2}/providers/{3}/workspaces/{4}/providers/{5}/{6}/{7}/{8}?api-version={9}\"\r\n",
-        "    arm_rest_url = arm_rest_url_template_for_list.format(root_url, subscription_id, resource_group_name, provider_name, sentinel_workspace_name, provider2_name, target_resource_name, resource_alias, sub_target_source_name, api_version)\r\n",
-        "    response = requests.get(arm_rest_url, headers=headers, verify=True)\r\n",
-        "    return response\r\n",
-        "\r\n",
-        "def get_token_for_azure():\r\n",
-        "    resource_uri = \"https://management.azure.com/\"\r\n",
-        "    client_id = mssparkutils.credentials.getSecret(akv_name, client_id_name, akv_link_name)\r\n",
-        "    client_secret = mssparkutils.credentials.getSecret(akv_name, client_secret_name, akv_link_name)\r\n",
-        "\r\n",
-        "    credential = ClientSecretCredential(\r\n",
-        "        tenant_id=tenant_id, \r\n",
-        "        client_id=client_id, \r\n",
-        "        client_secret=client_secret)\r\n",
-        "    access_token = credential.get_token(resource_uri + \"/.default\")\r\n",
-        "    return access_token[0]\r\n"
+        "## 3. Save result to Azure Log Analytics Custom Table"
       ],
-      "outputs": [],
-      "execution_count": null,
       "metadata": {
-        "jupyter": {
-          "source_hidden": false,
-          "outputs_hidden": false
-        },
         "nteract": {
           "transient": {
             "deleting": false
@@ -332,6 +335,7 @@
     {
       "cell_type": "code",
       "source": [
+        "# function for data converting\r\n",
         "def convert_dataframe_to_list_of_dictionaries(df, hasTimeGeneratedColumn):\r\n",
         "    list = df.to_dict('records')\r\n",
         "\r\n",
@@ -340,7 +344,7 @@
         "        if hasTimeGeneratedColumn and str(row['TimeGenerated']) != \"NaT\":\r\n",
         "            row['TimeGenerated']= row['TimeGenerated'].strftime(\"%Y-%m-%dT%H:%M:%S.%fZ\")\r\n",
         "    \r\n",
-        "    return list\r\n"
+        "    return list"
       ],
       "outputs": [],
       "execution_count": null,
@@ -359,10 +363,9 @@
     {
       "cell_type": "code",
       "source": [
-        "# Calling Microsoft Sentinel Watchlist API\r\n",
-        "# If you don't have Watchlist, you may create one, or try to access different features, such as Bookmarks.\r\n",
-        "header_token_value = \"Bearer {}\".format(get_token_for_azure())\r\n",
-        "response_watchlist = call_azure_rest_api_for_get_watchlist_items(header_token_value, \"zhzhaopitest\", \"zhzhaoasi\", \"zz20220801\", \"2023-02-01\")"
+        "# Construct data body for LA data ingestion\r\n",
+        "list_final = convert_dataframe_to_list_of_dictionaries(df_final, True)\r\n",
+        "body = list_final"
       ],
       "outputs": [],
       "execution_count": null,
@@ -381,139 +384,11 @@
     {
       "cell_type": "code",
       "source": [
-        "if response_watchlist != None:\r\n",
-        "    df_api_data = pd.DataFrame(response_watchlist.json()[\"value\"])"
-      ],
-      "outputs": [],
-      "execution_count": null,
-      "metadata": {
-        "jupyter": {
-          "source_hidden": false,
-          "outputs_hidden": false
-        },
-        "nteract": {
-          "transient": {
-            "deleting": false
-          }
-        }
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "df_union = pd.concat([df_la_query, df_api_data])\r\n",
-        "#display(df_union)"
-      ],
-      "outputs": [],
-      "execution_count": null,
-      "metadata": {
-        "jupyter": {
-          "source_hidden": false,
-          "outputs_hidden": false
-        },
-        "nteract": {
-          "transient": {
-            "deleting": false
-          }
-        }
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "final_result1 = convert_dataframe_to_list_of_dictionaries(df_la_query, True)\r\n",
-        "display(final_result1)"
-      ],
-      "outputs": [],
-      "execution_count": null,
-      "metadata": {
-        "jupyter": {
-          "source_hidden": false,
-          "outputs_hidden": false
-        },
-        "nteract": {
-          "transient": {
-            "deleting": false
-          }
-        }
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "final_result2 = convert_dataframe_to_list_of_dictionaries(df_api_data, False)\r\n",
-        "#display(final_result2)"
-      ],
-      "outputs": [],
-      "execution_count": null,
-      "metadata": {
-        "jupyter": {
-          "source_hidden": false,
-          "outputs_hidden": false
-        },
-        "nteract": {
-          "transient": {
-            "deleting": false
-          }
-        }
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "final_result = convert_dataframe_to_list_of_dictionaries(df_union, True)\r\n",
-        "#display(final_result)"
-      ],
-      "outputs": [],
-      "execution_count": null,
-      "metadata": {
-        "jupyter": {
-          "source_hidden": false,
-          "outputs_hidden": false
-        },
-        "nteract": {
-          "transient": {
-            "deleting": false
-          }
-        }
-      }
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "## 3. Save result to Azure Log Analytics Custom Table"
-      ],
-      "metadata": {
-        "nteract": {
-          "transient": {
-            "deleting": false
-          }
-        }
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "from azure.core.exceptions import HttpResponseError\r\n",
-        "from azure.identity import DefaultAzureCredential\r\n",
-        "from azure.monitor.ingestion import LogsIngestionClient\r\n",
-        "\r\n",
-        "body = [\r\n",
-        "      {\r\n",
-        "        \"TimeGenerated \": \"2023-08-04T14:51:14.1104269Z\",\r\n",
-        "        \"MyData\": \"Computer1\",\r\n",
-        "      },\r\n",
-        "      {\r\n",
-        "        \"TimeGenerated \": \"2023-08-04T14:41:14.1104269Z\",\r\n",
-        "        \"MyData\": \"Computer2\",\r\n",
-        "      }\r\n",
-        "    ]\r\n",
-        "\r\n",
-        "\r\n",
+        "# Data ingestion to LA custom table\r\n",
         "client = LogsIngestionClient(endpoint=dce_endpoint, credential=credential, logging_enable=True)\r\n",
         "\r\n",
         "try:\r\n",
-        "    ingestion_result = client.upload(rule_id=immutable_rule_id, stream_name=stream_name, logs=body)\r\n",
+        "    ingestion_result = client.upload(rule_id=immutable_rule_id_for_enrichment, stream_name=stream_name_for_enrichment, logs=body)\r\n",
         "except HttpResponseError as e:\r\n",
         "    print(f\"Upload failed: {e}\")"
       ],
@@ -533,7 +408,9 @@
     },
     {
       "cell_type": "code",
-      "source": [],
+      "source": [
+        "ingestion_result"
+      ],
       "outputs": [],
       "execution_count": null,
       "metadata": {