diff --git a/twitter_video_tools/twitter_crawler.py b/twitter_video_tools/twitter_crawler.py index 92ab43a..8db6b8b 100644 --- a/twitter_video_tools/twitter_crawler.py +++ b/twitter_video_tools/twitter_crawler.py @@ -29,15 +29,15 @@ def login(self, username: str, password: str, timeout: Optional[float] = 10000) self.page.get_by_label('Password').press('Enter') self.page.wait_for_url('https://twitter.com/home') - def get_all_liked_tweets(self, username: str, scroll_timeout: float = 0.8) -> list[str]: + def get_all_liked_video_tweets(self, username: str, scroll_timeout: float = 0.8) -> list[str]: """Get the username's all liked tweets Returns the list of links of liked tweets """ - return self.get_liked_tweets_until( + return self.get_liked_video_tweets_until( username, 'nothing', scroll_timeout ) # 'nothing' was intended because the given `until_link` would be never found on the links list - def get_liked_tweets_until(self, username: str, until_link: str, scroll_timeout: float = 0.8) -> list[str]: + def get_liked_video_tweets_until(self, username: str, until_link: str, scroll_timeout: float = 0.8) -> list[str]: """Scrolling down the list of liked tweets until the given `until_link` found Returns the list of links of liked tweets """ @@ -57,7 +57,7 @@ def get_liked_tweets_until(self, username: str, until_link: str, scroll_timeout: break previous_height = self.page_current_height - new_links = self._get_article_links_in_current_screen() + new_links = self._get_video_tweets_in_current_screen() links.extend(new_links) links = list(set(links)) @@ -70,7 +70,7 @@ def get_liked_tweets_until(self, username: str, until_link: str, scroll_timeout: def get_recent_liked_tweet(self, username: str) -> str: self._goto_liked_tweets(username) - return self._get_article_links_in_current_screen()[0] + return self._get_tweets_in_current_screen()[0] def get_video_of_tweet(self, link: str, timeout: Optional[float] = 10000) -> list[tuple[str, str]]: video_links: list[str] = [] @@ -97,7 +97,26 @@ def _goto_liked_tweets(self, username: str) -> None: self.page.goto(f'https://twitter.com/{username}/likes') self.page.wait_for_selector('article') - def _get_article_links_in_current_screen(self) -> list[str]: + def _get_video_tweets_in_current_screen(self) -> list[str]: + links: list[str] = [] + + while True: + articles = self.page.locator('article:has(video)') + article_length = articles.count() + try: + links = [ + 'https://twitter.com' + + (articles.nth(i).locator('div').locator('a').nth(3).get_attribute('href', timeout=500) or '') + for i in range(article_length) + ] + break + except Error: # if articles in the page are not reachable + self.page.mouse.wheel(0, 500) # scrolling down to refresh the articles + self.page.mouse.wheel(0, -500) # scrolling down to refresh the articles + + return links + + def _get_tweets_in_current_screen(self) -> list[str]: links: list[str] = [] while True: