diff --git a/README.md b/README.md index 5a3df41..00ae2ea 100644 --- a/README.md +++ b/README.md @@ -17,6 +17,7 @@ pipx install git+https://github.com/gthsheep/tap-tiktok `access_token` - Access Token for the API as obtained via the authentication process described below. `advertiser_id` - Advertiser ID for your TikTok account. `start_date` - Start date as of when to start collecting metrics, e.g. `2022-01-01T00:00:00Z` +`lookback` - Number of days prior to the current date for which data should be refetched (default `0`) A full list of supported settings and capabilities for this tap is available by running: diff --git a/tap_tiktok/streams.py b/tap_tiktok/streams.py index 76838e4..743ce48 100644 --- a/tap_tiktok/streams.py +++ b/tap_tiktok/streams.py @@ -2,6 +2,7 @@ import copy import json import datetime +import dateutil import requests from typing import Any, Dict, Iterable, Optional from urllib.parse import urlparse @@ -255,6 +256,17 @@ def get_url_params( start_date = datetime.datetime.strptime(next_page_token["start_date"], DATE_FORMAT) else: start_date = self.get_starting_timestamp(context) + + # picking up where we left off on the last run (or first run), adjust for lookback if set + lookback_window = self.config["lookback"] + if lookback_window > 0: + # if lookback is configured, we want to refetch data for the entire lookback window + # (or as far back as the configured start date, whichever is the most recent date) + start_date = max( + min(start_date, datetime.datetime.now(tz=start_date.tzinfo) - datetime.timedelta(days=lookback_window)), + dateutil.parser.isoparse(self.config["start_date"]), + ) + yesterday = datetime.datetime.now(tz=start_date.tzinfo) - datetime.timedelta(days=1) end_date = min(start_date + datetime.timedelta(days=STEP_NUM_DAYS), yesterday) params: dict = { diff --git a/tap_tiktok/tap.py b/tap_tiktok/tap.py index 8bf8de0..0bc9576 100644 --- a/tap_tiktok/tap.py +++ b/tap_tiktok/tap.py @@ -74,6 +74,12 @@ class TapTikTok(Tap): th.BooleanType, default=True, description="If true then deleted status entities will also be returned" + ), + th.Property( + "lookback", + th.IntegerType, + default=0, + description="The number of days of data to reload from the current date (ignored if current state of the extractor has a start date earlier than the current date minus number of lookback days)" ) ).to_dict()