Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[pull] master from datahub-project:master #383

Merged
merged 1 commit into from
Sep 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion metadata-ingestion/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -722,7 +722,7 @@
"snowflake-summary = datahub.ingestion.source.snowflake.snowflake_summary:SnowflakeSummarySource",
"snowflake-queries = datahub.ingestion.source.snowflake.snowflake_queries:SnowflakeQueriesSource",
"superset = datahub.ingestion.source.superset:SupersetSource",
"tableau = datahub.ingestion.source.tableau:TableauSource",
"tableau = datahub.ingestion.source.tableau.tableau:TableauSource",
"openapi = datahub.ingestion.source.openapi:OpenApiSource",
"metabase = datahub.ingestion.source.metabase:MetabaseSource",
"teradata = datahub.ingestion.source.sql.teradata:TeradataSource",
Expand Down
Empty file.

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

import datahub.emitter.mce_builder as builder
from datahub.configuration.common import ConfigModel
from datahub.ingestion.source import tableau_constant as c
from datahub.ingestion.source.tableau import tableau_constant as c
from datahub.metadata.com.linkedin.pegasus2avro.dataset import (
DatasetLineageType,
FineGrainedLineage,
Expand Down Expand Up @@ -223,19 +223,19 @@ class MetadataQueryException(Exception):
description
isHidden
folderName
upstreamFields {
name
datasource {
id
}
}
upstreamColumns {
name
table {
__typename
id
}
}
# upstreamFields {
# name
# datasource {
# id
# }
# }
# upstreamColumns {
# name
# table {
# __typename
# id
# }
# }
... on ColumnField {
dataCategory
role
Expand Down Expand Up @@ -336,6 +336,26 @@ class MetadataQueryException(Exception):
}
"""


datasource_upstream_fields_graphql_query = """
{
id
upstreamFields {
name
datasource {
id
}
}
upstreamColumns {
name
table {
__typename
id
}
}
}
"""

published_datasource_graphql_query = """
{
__typename
Expand Down Expand Up @@ -368,19 +388,19 @@ class MetadataQueryException(Exception):
description
isHidden
folderName
upstreamFields {
name
datasource {
id
}
}
upstreamColumns {
name
table {
__typename
id
}
}
# upstreamFields {
# name
# datasource {
# id
# }
# }
# upstreamColumns {
# name
# table {
# __typename
# id
# }
# }
... on ColumnField {
dataCategory
role
Expand Down Expand Up @@ -910,40 +930,46 @@ def make_filter(filter_dict: dict) -> str:
return filter


def query_metadata(
def query_metadata_cursor_based_pagination(
server: Server,
main_query: str,
connection_name: str,
first: int,
offset: int,
after: Optional[str],
qry_filter: str = "",
) -> dict:
query = """{{
{connection_name} (first:{first}, offset:{offset}, filter:{{{filter}}})
{{
nodes {main_query}
pageInfo {{
hasNextPage
endCursor
query = f"""
query GetItems(
$first: Int,
$after: String
) {{
{connection_name} ( first: $first, after: $after, filter:{{ {qry_filter} }})
{{
nodes {main_query}
pageInfo {{
hasNextPage
endCursor
}}
}}
totalCount
}}
}}""".format(
connection_name=connection_name,
first=first,
offset=offset,
filter=qry_filter,
main_query=main_query,
}}""" # {{ is to escape { character of f-string

result = server.metadata.query(
query=query,
variables={
"first": first,
"after": after,
},
)
return server.metadata.query(query)

return result


def get_filter_pages(query_filter: dict, page_size: int) -> List[dict]:
filter_pages = [query_filter]
# If this is primary id filter so we can use divide this query list into
# If this is primary id filter, so we can use divide this query list into
# multiple requests each with smaller filter list (of order page_size).
# It is observed in the past that if list of primary ids grow beyond
# a few ten thousands then tableau server responds with empty response
# It is observed in the past that if a list of primary ids grows beyond
# a few ten thousand, then tableau server responds with empty response
# causing below error:
# tableauserverclient.server.endpoint.exceptions.NonXMLResponseError: b''
if (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@
EXTRACT_LAST_INCREMENTAL_UPDATE_TIME = "extractLastIncrementalUpdateTime"
EXTRACT_LAST_UPDATE_TIME = "extractLastUpdateTime"
PUBLISHED_DATA_SOURCES_CONNECTION = "publishedDatasourcesConnection"
FIELDS_CONNECTION = "fieldsConnection"
DATA_SOURCE_FIELDS = "datasourceFields"
SHEETS_CONNECTION = "sheetsConnection"
CREATED_AT = "createdAt"
Expand Down
Loading
Loading