From 852a23b845122f1c2c5739d3fc24f0d135834d86 Mon Sep 17 00:00:00 2001
From: sagar-salvi-apptware
 <159135491+sagar-salvi-apptware@users.noreply.github.com>
Date: Tue, 10 Sep 2024 22:54:55 +0530
Subject: [PATCH] feat(mode/ingest): Add support for missing Mode datasets in
 lineage (#11290)

---
 .../ingestion/source/common/subtypes.py       |   1 +
 .../src/datahub/ingestion/source/mode.py      | 128 +++++++++++----
 .../integration/mode/mode_mces_golden.json    | 119 ++++++++++++--
 .../mode/setup/dataset_24f66e1701b6.json      | 149 ++++++++++++++++++
 .../setup/dataset_queries_24f66e1701b6.json   |  64 ++++++++
 .../mode/setup/datasets_157933cc1168.json     |  10 ++
 .../mode/setup/datasets_75737b70402e.json     | 149 ++++++++++++++++++
 .../mode/setup/reports_75737b70402e.json      |  29 +++-
 .../tests/integration/mode/test_mode.py       |   4 +
 9 files changed, 615 insertions(+), 38 deletions(-)
 create mode 100644 metadata-ingestion/tests/integration/mode/setup/dataset_24f66e1701b6.json
 create mode 100644 metadata-ingestion/tests/integration/mode/setup/dataset_queries_24f66e1701b6.json
 create mode 100644 metadata-ingestion/tests/integration/mode/setup/datasets_157933cc1168.json
 create mode 100644 metadata-ingestion/tests/integration/mode/setup/datasets_75737b70402e.json

diff --git a/metadata-ingestion/src/datahub/ingestion/source/common/subtypes.py b/metadata-ingestion/src/datahub/ingestion/source/common/subtypes.py
index fb22f0b6edde2..4bc120fbecf8f 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/common/subtypes.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/common/subtypes.py
@@ -72,6 +72,7 @@ class BIAssetSubTypes(StrEnum):
 
     # Mode
     MODE_REPORT = "Report"
+    MODE_DATASET = "Dataset"
     MODE_QUERY = "Query"
     MODE_CHART = "Chart"
 
diff --git a/metadata-ingestion/src/datahub/ingestion/source/mode.py b/metadata-ingestion/src/datahub/ingestion/source/mode.py
index 47475c5825a49..73427d9084dd3 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/mode.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/mode.py
@@ -106,7 +106,7 @@
     infer_output_schema,
 )
 from datahub.utilities import config_clean
-from datahub.utilities.lossy_collections import LossyDict, LossyList
+from datahub.utilities.lossy_collections import LossyList
 
 logger: logging.Logger = logging.getLogger(__name__)
 
@@ -199,10 +199,6 @@ class ModeSourceReport(StaleEntityRemovalSourceReport):
     num_query_template_render_failures: int = 0
     num_query_template_render_success: int = 0
 
-    dropped_imported_datasets: LossyDict[str, LossyList[str]] = dataclasses.field(
-        default_factory=LossyDict
-    )
-
     def report_dropped_space(self, ent_name: str) -> None:
         self.filtered_spaces.append(ent_name)
 
@@ -429,10 +425,25 @@ def construct_dashboard(
         # Last refreshed ts.
         last_refreshed_ts = self._parse_last_run_at(report_info)
 
+        # Datasets
+        datasets = []
+        for imported_dataset_name in report_info.get("imported_datasets", {}):
+            mode_dataset = self._get_request_json(
+                f"{self.workspace_uri}/reports/{imported_dataset_name.get('token')}"
+            )
+            dataset_urn = builder.make_dataset_urn_with_platform_instance(
+                self.platform,
+                str(mode_dataset.get("id")),
+                platform_instance=None,
+                env=self.config.env,
+            )
+            datasets.append(dataset_urn)
+
         dashboard_info_class = DashboardInfoClass(
             description=description if description else "",
             title=title if title else "",
             charts=self._get_chart_urns(report_token),
+            datasets=datasets if datasets else None,
             lastModified=last_modified,
             lastRefreshed=last_refreshed_ts,
             dashboardUrl=f"{self.config.connect_uri}/{self.config.workspace}/reports/{report_token}",
@@ -725,6 +736,10 @@ def _get_platform_and_dbname(
                     data_source.get("adapter", ""), data_source.get("name", "")
                 )
                 database = data_source.get("database", "")
+                # This is hacky but on bigquery we want to change the database if its default
+                # For lineage we need project_id.db.table
+                if platform == "bigquery" and database == "default":
+                    database = data_source.get("host", "")
                 return platform, database
         else:
             self.report.report_warning(
@@ -900,24 +915,36 @@ def normalize_mode_query(self, query: str) -> str:
 
         return rendered_query
 
-    def construct_query_from_api_data(
+    def construct_query_or_dataset(
         self,
         report_token: str,
         query_data: dict,
         space_token: str,
         report_info: dict,
+        is_mode_dataset: bool,
     ) -> Iterable[MetadataWorkUnit]:
-        query_urn = self.get_dataset_urn_from_query(query_data)
+        query_urn = (
+            self.get_dataset_urn_from_query(query_data)
+            if not is_mode_dataset
+            else self.get_dataset_urn_from_query(report_info)
+        )
+
         query_token = query_data.get("token")
 
+        externalUrl = (
+            f"{self.config.connect_uri}/{self.config.workspace}/datasets/{report_token}"
+            if is_mode_dataset
+            else f"{self.config.connect_uri}/{self.config.workspace}/reports/{report_token}/details/queries/{query_token}"
+        )
+
         dataset_props = DatasetPropertiesClass(
-            name=query_data.get("name"),
+            name=report_info.get("name") if is_mode_dataset else query_data.get("name"),
             description=f"""### Source Code
 ``` sql
 {query_data.get("raw_query")}
 ```
             """,
-            externalUrl=f"{self.config.connect_uri}/{self.config.workspace}/reports/{report_token}/details/queries/{query_token}",
+            externalUrl=externalUrl,
             customProperties=self.get_custom_props_from_dict(
                 query_data,
                 [
@@ -939,7 +966,22 @@ def construct_query_from_api_data(
             ).as_workunit()
         )
 
-        subtypes = SubTypesClass(typeNames=([BIAssetSubTypes.MODE_QUERY]))
+        if is_mode_dataset:
+            space_container_key = self.gen_space_key(space_token)
+            yield from add_dataset_to_container(
+                container_key=space_container_key,
+                dataset_urn=query_urn,
+            )
+
+        subtypes = SubTypesClass(
+            typeNames=(
+                [
+                    BIAssetSubTypes.MODE_DATASET
+                    if is_mode_dataset
+                    else BIAssetSubTypes.MODE_QUERY
+                ]
+            )
+        )
         yield (
             MetadataChangeProposalWrapper(
                 entityUrn=query_urn,
@@ -950,7 +992,9 @@ def construct_query_from_api_data(
         yield MetadataChangeProposalWrapper(
             entityUrn=query_urn,
             aspect=BrowsePathsV2Class(
-                path=self._browse_path_query(space_token, report_info)
+                path=self._browse_path_dashboard(space_token)
+                if is_mode_dataset
+                else self._browse_path_query(space_token, report_info)
             ),
         ).as_workunit()
 
@@ -958,7 +1002,6 @@ def construct_query_from_api_data(
             upstream_warehouse_platform,
             upstream_warehouse_db_name,
         ) = self._get_platform_and_dbname(query_data.get("data_source_id"))
-
         if upstream_warehouse_platform is None:
             # this means we can't infer the platform
             return
@@ -1022,7 +1065,7 @@ def construct_query_from_api_data(
         schema_fields = infer_output_schema(parsed_query_object)
         if schema_fields:
             schema_metadata = SchemaMetadataClass(
-                schemaName="mode_query",
+                schemaName="mode_dataset" if is_mode_dataset else "mode_query",
                 platform=f"urn:li:dataPlatform:{self.platform}",
                 version=0,
                 fields=schema_fields,
@@ -1040,7 +1083,7 @@ def construct_query_from_api_data(
             )
 
         yield from self.get_upstream_lineage_for_parsed_sql(
-            query_data, parsed_query_object
+            query_urn, query_data, parsed_query_object
         )
 
         operation = OperationClass(
@@ -1089,10 +1132,9 @@ def construct_query_from_api_data(
             ).as_workunit()
 
     def get_upstream_lineage_for_parsed_sql(
-        self, query_data: dict, parsed_query_object: SqlParsingResult
+        self, query_urn: str, query_data: dict, parsed_query_object: SqlParsingResult
     ) -> List[MetadataWorkUnit]:
         wu = []
-        query_urn = self.get_dataset_urn_from_query(query_data)
 
         if parsed_query_object is None:
             logger.info(
@@ -1350,6 +1392,24 @@ def _get_reports(self, space_token: str) -> List[dict]:
             )
         return reports
 
+    @lru_cache(maxsize=None)
+    def _get_datasets(self, space_token: str) -> List[dict]:
+        """
+        Retrieves datasets for a given space token.
+        """
+        datasets = []
+        try:
+            url = f"{self.workspace_uri}/spaces/{space_token}/datasets"
+            datasets_json = self._get_request_json(url)
+            datasets = datasets_json.get("_embedded", {}).get("reports", [])
+        except HTTPError as http_error:
+            self.report.report_failure(
+                title="Failed to Retrieve Datasets for Space",
+                message=f"Unable to retrieve datasets for space token {space_token}.",
+                context=f"Error: {str(http_error)}",
+            )
+        return datasets
+
     @lru_cache(maxsize=None)
     def _get_queries(self, report_token: str) -> list:
         queries = []
@@ -1523,24 +1583,14 @@ def emit_chart_mces(self) -> Iterable[MetadataWorkUnit]:
             for report in reports:
                 report_token = report.get("token", "")
 
-                if report.get("imported_datasets"):
-                    # The connector doesn't support imported datasets yet.
-                    # For now, we just keep this in the report to track what we're missing.
-                    imported_datasets = [
-                        imported_dataset.get("name") or str(imported_dataset)
-                        for imported_dataset in report["imported_datasets"]
-                    ]
-                    self.report.dropped_imported_datasets.setdefault(
-                        report_token, LossyList()
-                    ).extend(imported_datasets)
-
                 queries = self._get_queries(report_token)
                 for query in queries:
-                    query_mcps = self.construct_query_from_api_data(
+                    query_mcps = self.construct_query_or_dataset(
                         report_token,
                         query,
                         space_token=space_token,
                         report_info=report,
+                        is_mode_dataset=False,
                     )
                     chart_fields: Dict[str, SchemaFieldClass] = {}
                     for wu in query_mcps:
@@ -1566,6 +1616,27 @@ def emit_chart_mces(self) -> Iterable[MetadataWorkUnit]:
                             query_name=query["name"],
                         )
 
+    def emit_dataset_mces(self):
+        """
+        Emits MetadataChangeEvents (MCEs) for datasets within each space.
+        """
+        for space_token, _ in self.space_tokens.items():
+            datasets = self._get_datasets(space_token)
+
+            for report in datasets:
+                report_token = report.get("token", "")
+                queries = self._get_queries(report_token)
+                for query in queries:
+                    query_mcps = self.construct_query_or_dataset(
+                        report_token,
+                        query,
+                        space_token=space_token,
+                        report_info=report,
+                        is_mode_dataset=True,
+                    )
+                    for wu in query_mcps:
+                        yield wu
+
     @classmethod
     def create(cls, config_dict: dict, ctx: PipelineContext) -> "ModeSource":
         config: ModeConfig = ModeConfig.parse_obj(config_dict)
@@ -1581,6 +1652,7 @@ def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
 
     def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
         yield from self.emit_dashboard_mces()
+        yield from self.emit_dataset_mces()
         yield from self.emit_chart_mces()
 
     def get_report(self) -> SourceReport:
diff --git a/metadata-ingestion/tests/integration/mode/mode_mces_golden.json b/metadata-ingestion/tests/integration/mode/mode_mces_golden.json
index 2fa9f4ee86a86..a6a685672bda0 100644
--- a/metadata-ingestion/tests/integration/mode/mode_mces_golden.json
+++ b/metadata-ingestion/tests/integration/mode/mode_mces_golden.json
@@ -132,8 +132,8 @@
         "json": {
             "timestampMillis": 1638860400000,
             "partitionSpec": {
-                "type": "FULL_TABLE",
-                "partition": "FULL_TABLE_SNAPSHOT"
+                "partition": "FULL_TABLE_SNAPSHOT",
+                "type": "FULL_TABLE"
             },
             "viewsCount": 6
         }
@@ -173,7 +173,9 @@
                         "charts": [
                             "urn:li:chart:(mode,f622b9ee725b)"
                         ],
-                        "datasets": [],
+                        "datasets": [
+                            "urn:li:dataset:(urn:li:dataPlatform:mode,5450544,PROD)"
+                        ],
                         "lastModified": {
                             "created": {
                                 "time": 1639169724316,
@@ -243,6 +245,89 @@
         "lastRunId": "no-run-id-provided"
     }
 },
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mode,5450544,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "datasetProperties",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "updated_at": "2024-09-02T07:40:44.046Z",
+                "last_run_id": "3535709679",
+                "data_source_id": "44763",
+                "report_imports_count": "2"
+            },
+            "externalUrl": "https://app.mode.com/acryl/datasets/24f66e1701b6",
+            "name": "Dataset 1",
+            "description": "### Source Code\n``` sql\n-- Returns first 100 rows from DATAHUB_COMMUNITY.POSTGRES_PUBLIC.COMPANY\n  SELECT \n\t\tAGE,\n\t\tID,\n\t\tNAME,\n\t\t_FIVETRAN_DELETED,\n\t\t_FIVETRAN_SYNCED\n FROM DATAHUB_COMMUNITY.POSTGRES_PUBLIC.COMPANY LIMIT 100;\n\n-- Returns first 100 rows from ETHAN_TEST_DB.PUBLIC.ACCOUNT_PHONE_NUMBER\n  SELECT \n\t\tCOMMUNICATION_ACCOUNT_ID,\n\t\tID,\n\t\tMMS_CAPABLE,\n\t\tPHONE_NUMBER,\n\t\tSMS_CAPABLE,\n\t\tSTATUS,\n\t\tSTATUS_TLM,\n\t\tTLM,\n\t\tVOICE_CAPABLE,\n\t\tWHEN_CREATED\n FROM ETHAN_TEST_DB.PUBLIC.ACCOUNT_PHONE_NUMBER LIMIT 100;\n \n \n```\n            ",
+            "tags": []
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1638860400000,
+        "runId": "mode-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mode,5450544,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "container",
+    "aspect": {
+        "json": {
+            "container": "urn:li:container:800cfcb4cec6ad587cafde11a0b0bb4a"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1638860400000,
+        "runId": "mode-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mode,5450544,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "subTypes",
+    "aspect": {
+        "json": {
+            "typeNames": [
+                "Dataset"
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1638860400000,
+        "runId": "mode-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mode,5450544,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "browsePathsV2",
+    "aspect": {
+        "json": {
+            "path": [
+                {
+                    "id": "acryl"
+                },
+                {
+                    "id": "urn:li:container:800cfcb4cec6ad587cafde11a0b0bb4a",
+                    "urn": "urn:li:container:800cfcb4cec6ad587cafde11a0b0bb4a"
+                }
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1638860400000,
+        "runId": "mode-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
 {
     "entityType": "dataset",
     "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mode,10149707,PROD)",
@@ -643,8 +728,8 @@
         "json": {
             "timestampMillis": 1638860400000,
             "partitionSpec": {
-                "type": "FULL_TABLE",
-                "partition": "FULL_TABLE_SNAPSHOT"
+                "partition": "FULL_TABLE_SNAPSHOT",
+                "type": "FULL_TABLE"
             },
             "operationType": "UPDATE",
             "lastUpdatedTimestamp": 1639177973273
@@ -721,9 +806,9 @@
         "json": {
             "fields": [
                 {
-                    "schemaFieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mode,10149707,PROD),payment_date)",
+                    "schemaFieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mode,10149707,PROD),amount)",
                     "schemaField": {
-                        "fieldPath": "payment_date",
+                        "fieldPath": "amount",
                         "nullable": false,
                         "type": {
                             "type": {
@@ -743,9 +828,9 @@
                     }
                 },
                 {
-                    "schemaFieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mode,10149707,PROD),amount)",
+                    "schemaFieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mode,10149707,PROD),payment_date)",
                     "schemaField": {
-                        "fieldPath": "amount",
+                        "fieldPath": "payment_date",
                         "nullable": false,
                         "type": {
                             "type": {
@@ -943,6 +1028,22 @@
         "lastRunId": "no-run-id-provided"
     }
 },
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mode,5450544,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1638860400000,
+        "runId": "mode-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
 {
     "entityType": "query",
     "entityUrn": "urn:li:query:10149707.34499.1897576958",
diff --git a/metadata-ingestion/tests/integration/mode/setup/dataset_24f66e1701b6.json b/metadata-ingestion/tests/integration/mode/setup/dataset_24f66e1701b6.json
new file mode 100644
index 0000000000000..4e9cb911ab565
--- /dev/null
+++ b/metadata-ingestion/tests/integration/mode/setup/dataset_24f66e1701b6.json
@@ -0,0 +1,149 @@
+{
+    "token": "24f66e1701b6",
+    "id": 5450544,
+    "name": "Dataset 1",
+    "description": "",
+    "created_at": "2024-09-02T07:38:43.722Z",
+    "updated_at": "2024-09-02T07:40:44.026Z",
+    "published_at": null,
+    "edited_at": "2024-09-02T07:40:32.668Z",
+    "type": "DatasetReport",
+    "last_successful_sync_at": null,
+    "last_saved_at": "2024-09-02T07:40:32.679Z",
+    "archived": false,
+    "space_token": "75737b70402e",
+    "account_id": 751252,
+    "account_username": "acryltest",
+    "public": false,
+    "manual_run_disabled": false,
+    "drill_anywhere_enabled": false,
+    "run_privately": true,
+    "drilldowns_enabled": false,
+    "expected_runtime": 0.763795,
+    "last_successfully_run_at": "2024-09-02T07:40:44.009Z",
+    "last_run_at": "2024-09-02T07:40:43.185Z",
+    "last_successful_run_token": "29e56ca29a45",
+    "query_count": 1,
+    "max_query_count": 160,
+    "runs_count": 3,
+    "schedules_count": 0,
+    "query_preview": "-- Returns first 100 rows from DATAHUB_COMMUNITY.POSTGRES_PUBLIC.COMPANY\n  SELECT \n\t\tAGE,\n\t\tID,\n\t\tNAME,\n\t\t_FIVETRAN_DELE",
+    "view_count": 6,
+    "thoughtspot_published_at": null,
+    "_links": {
+        "self": {
+            "href": "/api/acryltest/reports/24f66e1701b6"
+        },
+        "web": {
+            "href": "https://app.mode.com/acryltest/datasets/24f66e1701b6"
+        },
+        "web_edit": {
+            "href": "/editor/acryltest/datasets/24f66e1701b6"
+        },
+        "account": {
+            "href": "/api/acryltest"
+        },
+        "report_run": {
+            "templated": true,
+            "href": "/api/acryltest/reports/24f66e1701b6/runs/{id}?embed[result]=1"
+        },
+        "space": {
+            "href": "/api/acryltest/collections/75737b70402e"
+        },
+        "space_links": {
+            "href": "/api/acryltest/reports/24f66e1701b6/space_links"
+        },
+        "queries": {
+            "href": "/api/acryltest/reports/24f66e1701b6/queries"
+        },
+        "report_runs": {
+            "href": "/api/acryltest/reports/24f66e1701b6/runs"
+        },
+        "report_pins": {
+            "href": "/api/acryltest/reports/24f66e1701b6/pins"
+        },
+        "report_schedules": {
+            "href": "/api/acryltest/reports/24f66e1701b6/schedules"
+        },
+        "dataset_dependencies": {
+            "href": "/api/acryltest/datasets/24f66e1701b6/reports"
+        },
+        "last_run": {
+            "href": "/api/acryltest/reports/24f66e1701b6/runs/29e56ca29a45"
+        },
+        "last_successful_run": {
+            "href": "/api/acryltest/reports/24f66e1701b6/runs/29e56ca29a45"
+        },
+        "perspective_email_subscription_memberships": {
+            "href": "/api/acryltest/reports/24f66e1701b6/perspective_email_report_subscription_memberships"
+        },
+        "creator": {
+            "href": "/api/modeuser"
+        },
+        "report_index_web": {
+            "href": "/acryltest/spaces/75737b70402e"
+        }
+    },
+    "_forms": {
+        "edit": {
+            "method": "patch",
+            "action": "/api/acryltest/reports/24f66e1701b6",
+            "input": {
+                "report": {
+                    "name": {
+                        "type": "text",
+                        "value": "Dataset_2"
+                    },
+                    "description": {
+                        "type": "text",
+                        "value": ""
+                    },
+                    "account_id": {
+                        "type": "text",
+                        "value": 751252
+                    },
+                    "space_token": {
+                        "type": "text",
+                        "value": "75737b70402e"
+                    }
+                }
+            }
+        },
+        "destroy": {
+            "method": "delete",
+            "action": "/api/acryltest/reports/24f66e1701b6"
+        },
+        "archive": {
+            "method": "patch",
+            "action": "/api/acryltest/reports/24f66e1701b6/archive"
+        },
+        "unarchive": {
+            "method": "patch",
+            "action": "/api/acryltest/reports/24f66e1701b6/unarchive"
+        },
+        "update_settings": {
+            "method": "patch",
+            "action": "/api/acryltest/reports/24f66e1701b6/update_settings",
+            "input": {
+                "report": {
+                    "manual_run_disabled": {
+                        "type": "select",
+                        "options": [
+                            true,
+                            false
+                        ],
+                        "value": false
+                    },
+                    "drill_anywhere_enabled": {
+                        "type": "select",
+                        "options": [
+                            true,
+                            false
+                        ],
+                        "value": false
+                    }
+                }
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/metadata-ingestion/tests/integration/mode/setup/dataset_queries_24f66e1701b6.json b/metadata-ingestion/tests/integration/mode/setup/dataset_queries_24f66e1701b6.json
new file mode 100644
index 0000000000000..ba3be157786e6
--- /dev/null
+++ b/metadata-ingestion/tests/integration/mode/setup/dataset_queries_24f66e1701b6.json
@@ -0,0 +1,64 @@
+{
+    "_links": {
+        "self": {
+            "href": "/api/acryl/reports/24f66e1701b6/queries"
+        }
+    },
+    "_embedded": {
+        "queries": [
+            {
+                "id": 19780522,
+                "token": "9b2f34343531",
+                "raw_query": "-- Returns first 100 rows from DATAHUB_COMMUNITY.POSTGRES_PUBLIC.COMPANY\n  SELECT \n\t\tAGE,\n\t\tID,\n\t\tNAME,\n\t\t_FIVETRAN_DELETED,\n\t\t_FIVETRAN_SYNCED\n FROM DATAHUB_COMMUNITY.POSTGRES_PUBLIC.COMPANY LIMIT 100;\n\n-- Returns first 100 rows from ETHAN_TEST_DB.PUBLIC.ACCOUNT_PHONE_NUMBER\n  SELECT \n\t\tCOMMUNICATION_ACCOUNT_ID,\n\t\tID,\n\t\tMMS_CAPABLE,\n\t\tPHONE_NUMBER,\n\t\tSMS_CAPABLE,\n\t\tSTATUS,\n\t\tSTATUS_TLM,\n\t\tTLM,\n\t\tVOICE_CAPABLE,\n\t\tWHEN_CREATED\n FROM ETHAN_TEST_DB.PUBLIC.ACCOUNT_PHONE_NUMBER LIMIT 100;\n \n ",
+                "created_at": "2024-09-02T07:38:43.755Z",
+                "updated_at": "2024-09-02T07:40:44.046Z",
+                "name": "Query 1",
+                "last_run_id": 3535709679,
+                "data_source_id": 44763,
+                "explorations_count": 0,
+                "report_imports_count": 2,
+                "dbt_metric_id": null,
+                "_links": {
+                    "self": {
+                        "href": "/api/acryl/reports/24f66e1701b6/queries/9b2f34343531"
+                    },
+                    "report": {
+                        "href": "/api/acryl/reports/24f66e1701b6"
+                    },
+                    "report_runs": {
+                        "href": "/api/acryl/reports/24f66e1701b6/runs"
+                    },
+                    "query_runs": {
+                        "href": "/api/acryl/reports/24f66e1701b6/queries/9b2f34343531/runs"
+                    },
+                    "creator": {
+                        "href": "/api/modeuser"
+                    }
+                },
+                "_forms": {
+                    "edit": {
+                        "method": "patch",
+                        "action": "/api/acryl/reports/24f66e1701b6/queries/9b2f34343531",
+                        "content_type": "application/json",
+                        "input": {
+                            "query": {
+                                "raw_query": {
+                                    "type": "text",
+                                    "value": "-- Returns first 100 rows from DATAHUB_COMMUNITY.POSTGRES_PUBLIC.COMPANY\n  SELECT \n\t\tAGE,\n\t\tID,\n\t\tNAME,\n\t\t_FIVETRAN_DELETED,\n\t\t_FIVETRAN_SYNCED\n FROM DATAHUB_COMMUNITY.POSTGRES_PUBLIC.COMPANY LIMIT 100;\n\n-- Returns first 100 rows from ETHAN_TEST_DB.PUBLIC.ACCOUNT_PHONE_NUMBER\n  SELECT \n\t\tCOMMUNICATION_ACCOUNT_ID,\n\t\tID,\n\t\tMMS_CAPABLE,\n\t\tPHONE_NUMBER,\n\t\tSMS_CAPABLE,\n\t\tSTATUS,\n\t\tSTATUS_TLM,\n\t\tTLM,\n\t\tVOICE_CAPABLE,\n\t\tWHEN_CREATED\n FROM ETHAN_TEST_DB.PUBLIC.ACCOUNT_PHONE_NUMBER LIMIT 100;\n \n "
+                                },
+                                "name": {
+                                    "type": "text",
+                                    "value": "Query 1"
+                                },
+                                "data_source_id": {
+                                    "type": "text",
+                                    "value": 44763
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        ]
+    }
+}
\ No newline at end of file
diff --git a/metadata-ingestion/tests/integration/mode/setup/datasets_157933cc1168.json b/metadata-ingestion/tests/integration/mode/setup/datasets_157933cc1168.json
new file mode 100644
index 0000000000000..4ca48a84e9110
--- /dev/null
+++ b/metadata-ingestion/tests/integration/mode/setup/datasets_157933cc1168.json
@@ -0,0 +1,10 @@
+{
+  "_links": {
+    "self": {
+      "href": "/api/acryltest/collections/157933cc1168/reports"
+    }
+  },
+  "_embedded": {
+    "reports": []
+  }
+}
\ No newline at end of file
diff --git a/metadata-ingestion/tests/integration/mode/setup/datasets_75737b70402e.json b/metadata-ingestion/tests/integration/mode/setup/datasets_75737b70402e.json
new file mode 100644
index 0000000000000..ffb1bbf521db7
--- /dev/null
+++ b/metadata-ingestion/tests/integration/mode/setup/datasets_75737b70402e.json
@@ -0,0 +1,149 @@
+{
+  "_links": {
+    "self": {
+      "href": "/api/acryltest/collections/75737b70402e/reports"
+    }
+  },
+  "_embedded": {
+    "reports": [
+      {
+        "account_id": 751252,
+        "account_username": "acryltest",
+        "collection_name": "AcrylTest",
+        "collection_token": "75737b70402e",
+        "created_at": "2024-09-02T07:38:43.722Z",
+        "description": "",
+        "drilldowns_enabled": false,
+        "edited_at": "2024-09-02T07:40:32.668Z",
+        "id": 5450544,
+        "is_sample": false,
+        "last_run_at": "2024-09-02T07:40:43.185Z",
+        "last_saved_at": "2024-09-02T07:40:32.679Z",
+        "last_successful_run_token": "29e56ca29a45",
+        "last_successful_sync_at": null,
+        "last_successfully_run_at": "2024-09-02T07:40:44.009Z",
+        "manual_run_disabled": false,
+        "max_query_count": 1,
+        "name": "Dataset 1",
+        "public": false,
+        "query_count": 1,
+        "query_preview": "-- Returns first 100 rows from DATAHUB_COMMUNITY.POSTGRES_PUBLIC.COMPANY\n  SELECT \n\t\tAGE,\n\t\tID,\n\t\tNAME,\n\t\t_FIVETRAN_DELE",
+        "run_privately": true,
+        "runs_count": 3,
+        "schedules_count": 0,
+        "space_token": "75737b70402e",
+        "switch_view_token": "f213a1bb8f8a",
+        "token": "24f66e1701b6",
+        "type": "DatasetReport",
+        "updated_at": "2024-09-02T07:40:44.026Z",
+        "view_count": 6,
+        "thoughtspot_published_at": null,
+        "_links": {
+            "account": {
+                "href": "/api/acryltest"
+            },
+            "creator": {
+                "href": "/api/modeuser"
+            },
+            "dataset_dependencies": {
+                "href": "/api/acryltest/datasets/24f66e1701b6/reports"
+            },
+            "last_run": {
+                "href": "/api/acryltest/reports/24f66e1701b6/runs/29e56ca29a45"
+            },
+            "last_successful_run": {
+                "href": "/api/acryltest/reports/24f66e1701b6/runs/29e56ca29a45"
+            },
+            "queries": {
+                "href": "/api/acryltest/reports/24f66e1701b6/queries"
+            },
+            "report_index_web": {
+                "href": "/acryltest/spaces/75737b70402e"
+            },
+            "report_pins": {
+                "href": "/api/acryltest/reports/24f66e1701b6/pins"
+            },
+            "report_run": {
+                "templated": true,
+                "href": "/api/acryltest/reports/24f66e1701b6/runs/{id}?embed[result]=1"
+            },
+            "report_runs": {
+                "href": "/api/acryltest/reports/24f66e1701b6/runs"
+            },
+            "report_schedules": {
+                "href": "/api/acryltest/reports/24f66e1701b6/schedules"
+            },
+            "self": {
+                "href": "/api/acryltest/reports/24f66e1701b6"
+            },
+            "space": {
+                "href": "/api/acryltest/collections/75737b70402e"
+            },
+            "space_links": {
+                "href": "/api/acryltest/reports/24f66e1701b6/space_links"
+            },
+            "web": {
+                "href": "https://app.mode.com/acryltest/datasets/24f66e1701b6"
+            },
+            "web_edit": {
+                "href": "/editor/acryltest/datasets/24f66e1701b6"
+            }
+        },
+        "_forms": {
+            "destroy": {
+                "method": "delete",
+                "action": "/api/acryltest/reports/24f66e1701b6"
+            },
+            "edit": {
+                "method": "patch",
+                "action": "/api/acryltest/reports/24f66e1701b6",
+                "input": {
+                    "report": {
+                        "name": {
+                            "type": "text",
+                            "value": "Dataset_2"
+                        },
+                        "description": {
+                            "type": "text",
+                            "value": ""
+                        },
+                        "account_id": {
+                            "type": "text",
+                            "value": 751252
+                        },
+                        "space_token": {
+                            "type": "text",
+                            "value": "75737b70402e"
+                        }
+                    }
+                }
+            },
+            "update_settings": {
+                "method": "patch",
+                "action": "/api/acryltest/reports/24f66e1701b6/update_settings",
+                "input": {
+                    "report": {
+                        "manual_run_disabled": {
+                            "type": "select",
+                            "options": [
+                                true,
+                                false
+                            ],
+                            "value": false
+                        },
+                        "drill_anywhere_enabled": {
+                            "type": "select",
+                            "options": [
+                                true,
+                                false
+                            ],
+                            "value": false
+                        }
+                    }
+                }
+            }
+        }
+     }
+    ]
+  }
+}
\ No newline at end of file
diff --git a/metadata-ingestion/tests/integration/mode/setup/reports_75737b70402e.json b/metadata-ingestion/tests/integration/mode/setup/reports_75737b70402e.json
index 9718967e5e463..956093a95d849 100644
--- a/metadata-ingestion/tests/integration/mode/setup/reports_75737b70402e.json
+++ b/metadata-ingestion/tests/integration/mode/setup/reports_75737b70402e.json
@@ -221,7 +221,34 @@
             }
           }
         }
-      }
+      },
+      "imported_datasets": [
+        {
+            "name": "Dataset 1",
+            "token": "24f66e1701b6",
+            "_links": {
+                "report": {
+                    "href": "/api/acryltest/reports/94750a190dc8"
+                },
+                "source_dataset": {
+                    "href": "/api/acryltest/reports/24f66e1701b6"
+                }
+            },
+            "_forms": {
+                "refresh": {
+                    "method": "post",
+                    "action": "/api/acryltest/reports/94750a190dc8/runs",
+                    "input": {
+                        "dataset_tokens": [
+                            {
+                                "token": "24f66e1701b6"
+                            }
+                        ]
+                    }
+                }
+            }
+        }
+    ]
     }]
   }
 }
\ No newline at end of file
diff --git a/metadata-ingestion/tests/integration/mode/test_mode.py b/metadata-ingestion/tests/integration/mode/test_mode.py
index 0346767b05d25..7ea6597460de2 100644
--- a/metadata-ingestion/tests/integration/mode/test_mode.py
+++ b/metadata-ingestion/tests/integration/mode/test_mode.py
@@ -22,6 +22,10 @@
     "https://app.mode.com/api/acryl/reports/9d2da37fa91e/queries/6e26a9f3d4e2/charts": "charts.json",
     "https://app.mode.com/api/acryl/data_sources": "data_sources.json",
     "https://app.mode.com/api/acryl/definitions": "definitions.json",
+    "https://app.mode.com/api/acryl/spaces/157933cc1168/datasets": "datasets_157933cc1168.json",
+    "https://app.mode.com/api/acryl/spaces/75737b70402e/datasets": "datasets_75737b70402e.json",
+    "https://app.mode.com/api/acryl/reports/24f66e1701b6": "dataset_24f66e1701b6.json",
+    "https://app.mode.com/api/acryl/reports/24f66e1701b6/queries": "dataset_queries_24f66e1701b6.json",
 }
 
 RESPONSE_ERROR_LIST = ["https://app.mode.com/api/acryl/spaces/75737b70402e/reports"]