From 0b7661f53f58a5a8923e7646bb34f8b16296ad74 Mon Sep 17 00:00:00 2001 From: Amadej Kastelic Date: Fri, 29 Nov 2024 15:31:32 +0100 Subject: [PATCH] Linkedin integration impl (#28) * Linkedin integration impl * remove comments --- README.md | 1 + bot/common/utils.py | 26 ++++++- bot/constants.py | 1 + bot/domain/post_format.py | 8 ++ bot/integrations/instagram/aiograpi/client.py | 8 +- bot/integrations/linkedin/__init__.py | 0 bot/integrations/linkedin/client.py | 74 +++++++++++++++++++ bot/integrations/linkedin/config.py | 7 ++ bot/integrations/registry.py | 2 + docker-compose.yml | 2 +- 10 files changed, 124 insertions(+), 5 deletions(-) create mode 100644 bot/integrations/linkedin/__init__.py create mode 100644 bot/integrations/linkedin/client.py create mode 100644 bot/integrations/linkedin/config.py diff --git a/README.md b/README.md index ad28e4d..2325c49 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,7 @@ A Discord bot that automatically embeds media and metadata of messages containin - Threads ✅ - 24ur.com ✅ - 4chan ✅ +- Linkedin ✅ ## How to run - Build the docker image: `docker build . -t video-embed-bot` or simply pull it from ghcr: diff --git a/bot/common/utils.py b/bot/common/utils.py index cd655d0..60f0996 100644 --- a/bot/common/utils.py +++ b/bot/common/utils.py @@ -3,9 +3,9 @@ import io import mimetypes import os -import ssl import random import re +import ssl import tempfile import typing from contextlib import contextmanager @@ -144,3 +144,27 @@ def date_to_human_format(date: datetime.datetime) -> str: return date.strftime('%b %-d, %Y') return date.strftime('%H:%M · %b %-d, %Y') + + +def parse_relative_time(relative_time: str) -> datetime.timedelta: + units = { + 'y': 'years', + 'mo': 'months', + 'w': 'weeks', + 'd': 'days', + 'h': 'hours', + 'm': 'minutes', + 's': 'seconds', + } + + relative_time = relative_time.strip().lower() + + # Extract the number and unit + number = int(''.join([ch for ch in relative_time if ch.isdigit()])) + unit = ''.join([ch for ch in relative_time if ch.isalpha()]) + + if unit not in units: + raise ValueError(f"Unsupported time unit: {unit}") + + # Return the appropriate timedelta + return datetime.timedelta(**{units[unit]: number}) diff --git a/bot/constants.py b/bot/constants.py index 590b199..f4731ef 100644 --- a/bot/constants.py +++ b/bot/constants.py @@ -31,6 +31,7 @@ class ServerStatus(enum.Enum): class Integration(enum.Enum): INSTAGRAM = 'instagram' FACEBOOK = 'facebook' + LINKEDIN = 'linkedin' TIKTOK = 'tiktok' REDDIT = 'reddit' THREADS = 'threads' diff --git a/bot/domain/post_format.py b/bot/domain/post_format.py index 598f0d3..4b22cd5 100644 --- a/bot/domain/post_format.py +++ b/bot/domain/post_format.py @@ -23,6 +23,13 @@ 👍🏻 Likes: {likes}\n """ +LINKEDIN_POST_FORMAT = """🔗 URL: {url} +🧑🏻‍🎨 Author: {author} +📅 Created: {created} +👍🏻 Likes: {likes} +📕 Description: {description}\n +""" + REDDIT_POST_FORMAT = """🔗 URL: {url} 🧑🏻‍🎨 Author: {author} 📅 Created: {created} @@ -77,6 +84,7 @@ constants.Integration.FACEBOOK: DEFAULT_POST_FORMAT, constants.Integration.FOUR_CHAN: FOUR_CHAN_POST_FORMAT, constants.Integration.INSTAGRAM: INSTAGRAM_POST_FORMAT, + constants.Integration.LINKEDIN: LINKEDIN_POST_FORMAT, constants.Integration.REDDIT: REDDIT_POST_FORMAT, constants.Integration.THREADS: THREADS_POST_FORMAT, constants.Integration.TIKTOK: TIKTOK_POST_FORMAT, diff --git a/bot/integrations/instagram/aiograpi/client.py b/bot/integrations/instagram/aiograpi/client.py index a253aeb..3a650e1 100644 --- a/bot/integrations/instagram/aiograpi/client.py +++ b/bot/integrations/instagram/aiograpi/client.py @@ -44,12 +44,14 @@ async def get_integration_data( async def get_post(self, url: str) -> domain.Post: try: return await self._get_post(url) - except (aiograpi_exceptions.PreLoginRequired, aiograpi_exceptions.ClientLoginRequired): + except ( + aiograpi_exceptions.PreLoginRequired, + aiograpi_exceptions.ClientLoginRequired, + aiograpi_exceptions.ReloginAttemptExceeded, + ): await self.login() except aiograpi_exceptions.LoginRequired: await self.login(relogin=True) - except aiograpi_exceptions.ReloginAttemptExceeded: - await self.login() return await self._get_post(url) diff --git a/bot/integrations/linkedin/__init__.py b/bot/integrations/linkedin/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/bot/integrations/linkedin/client.py b/bot/integrations/linkedin/client.py new file mode 100644 index 0000000..170364e --- /dev/null +++ b/bot/integrations/linkedin/client.py @@ -0,0 +1,74 @@ +import datetime +import typing + +from django.conf import settings +from playwright.async_api import async_playwright + +from bot import constants +from bot import domain +from bot import logger +from bot.common import utils +from bot.integrations import base +from bot.integrations.linkedin import config + + +class LinkedinClientSingleton(base.BaseClientSingleton): + DOMAINS = ['linkedin.com/posts', 'linkedin.com/feed'] + _CONFIG_SCHEMA = config.LinkedinConfig + + @classmethod + def _create_instance(cls) -> None: + conf: config.LinkedinConfig = cls._load_config(conf=settings.INTEGRATION_CONFIGURATION.get('linkedin', {})) + + if not conf.enabled: + logger.info('Linkedin integration not enabled') + cls._INSTANCE = base.MISSING + return + + cls._INSTANCE = LinkedinClient() + + +class LinkedinClient(base.BaseClient): + INTEGRATION = constants.Integration.LINKEDIN + + async def get_integration_data(self, url: str) -> typing.Tuple[constants.Integration, str, typing.Optional[int]]: + id_part = url.strip('/').split('?')[0].split('/')[-1] + if ':' in id_part: + return self.INTEGRATION, id_part.split(':')[-1], None + return self.INTEGRATION, id_part.split('-')[-2], None + + async def get_post(self, url: str) -> domain.Post: + async with async_playwright() as p: + browser = await p.chromium.launch(headless=True) + context = await browser.new_context() + + page = await context.new_page() + await page.goto(url) + + author = await page.locator('[data-tracking-control-name="public_post_feed-actor-name"]').inner_text() + description = await page.locator('[data-test-id="main-feed-activity-card__commentary"]').inner_text() + likes_text = await page.locator('[data-test-id="social-actions__reaction-count"]').inner_text() + likes = int(likes_text.replace(',', '').replace('.', '') or 0) + relative_time = await page.locator('time').inner_text() + + post = domain.Post( + url=url, + author=author.strip(), + description=description.strip(), + likes=likes, + created=datetime.datetime.now() - utils.parse_relative_time(relative_time), + ) + + video_locator = page.locator('meta[property="og:video"]') + video_count = await video_locator.count() + media_url = await video_locator.get_attribute('content') if video_count > 0 else None + if not media_url: + image_locator = page.locator('meta[property="og:image"]') + image_count = await image_locator.count() + media_url = await image_locator.get_attribute('content') if image_count > 0 else None + + if media_url: + post.buffer = await self._download(media_url) + + await browser.close() + return post diff --git a/bot/integrations/linkedin/config.py b/bot/integrations/linkedin/config.py new file mode 100644 index 0000000..54d5378 --- /dev/null +++ b/bot/integrations/linkedin/config.py @@ -0,0 +1,7 @@ +from bot.integrations import base + + +class LinkedinConfig(base.BaseClientConfig): + """ + No additional settings for Linkedin integration + """ diff --git a/bot/integrations/registry.py b/bot/integrations/registry.py index 645243d..29bb8f4 100644 --- a/bot/integrations/registry.py +++ b/bot/integrations/registry.py @@ -4,6 +4,7 @@ from bot.integrations.facebook import client as facebook_client from bot.integrations.four_chan import client as four_chan_client from bot.integrations.instagram import singleton as instagram_client +from bot.integrations.linkedin import client as linkedin_client from bot.integrations.reddit import client as reddit_client from bot.integrations.threads import client as threads_client from bot.integrations.tiktok import client as tiktok_client @@ -17,6 +18,7 @@ facebook_client.FacebookClientSingleton, four_chan_client.FourChanClientSingleton, instagram_client.InstagramClientSingleton, + linkedin_client.LinkedinClientSingleton, reddit_client.RedditClientSingleton, threads_client.ThreadsClientSingleton, tiktok_client.TiktokClientSingleton, diff --git a/docker-compose.yml b/docker-compose.yml index 011211c..404b8f3 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -17,4 +17,4 @@ services: ports: - 11211:11211 command: - -- 'memory-limit=2048' \ No newline at end of file + -- 'memory-limit=2048'