Merge pull request #32 from r-anime/participation_script

Add participation check script
r-anime · Dec 23, 2023 · ab7eaa8 · ab7eaa8
2 parents aa41f3d + 9c14485
commit ab7eaa8
Show file tree

Hide file tree

Showing 9 changed files with 161 additions and 21 deletions.
diff --git a/scripts/frontpage.py b/scripts/frontpage.py
@@ -31,7 +31,7 @@ def _format_line(submission, position, rank_change, total_hours):
 
     line += f" {submission.score:>5}"
 
-    line += " {:>24}".format(f"[{submission.link_flair_text}]({submission.id})")
+    line += " {:>25}".format(f"[{submission.link_flair_text}]({submission.id})")
 
     line += f" <{submission.author.name}>" if submission.author is not None else " <[deleted]>"
 

diff --git a/scripts/modapps.py b/scripts/modapps.py
@@ -45,9 +45,13 @@ def process_row(row, activity_start_date, activity_end_date):
     # and overall history on /r/anime.
     activity_start_time_str = activity_start_date.isoformat()
     activity_end_time_str = activity_end_date.isoformat()
-    user_comments_window = len(
+    user_comments_window_with_cdf = len(
         comment_service.get_comments_by_username(username, activity_start_time_str, activity_end_time_str)
     )
+    user_comments_window = len(
+        comment_service.get_comments_by_username(username, activity_start_time_str, activity_end_time_str, True)
+    )
+    cdf_window = user_comments_window_with_cdf - user_comments_window
     user_posts_window = len(
         post_service.get_posts_by_username(username, activity_start_time_str, activity_end_time_str)
     )
@@ -59,14 +63,20 @@ def process_row(row, activity_start_date, activity_end_date):
         mod_actions[mod_action.action].append(mod_action)
     mod_actions_str = ", ".join(f"{action} ({len(action_list)})" for action, action_list in mod_actions.items())
 
-    user_comments_total = len(comment_service.get_comments_by_username(username, "2021-06-01"))
-    user_posts_total = len(post_service.get_posts_by_username(username, "2021-06-01"))
+    user_comments_total_with_cdf = len(comment_service.get_comments_by_username(username, "2020-01-01"))
+    user_comments_total = len(comment_service.get_comments_by_username(username, "2020-01-01", exclude_cdf=True))
+    cdf_total = user_comments_total_with_cdf - user_comments_total
+    user_posts_total = len(post_service.get_posts_by_username(username, "2020-01-01"))
 
     passes_activity_threshold = "✅" if user_comments_window + user_posts_window > 50 else "❌"
 
     response_body += f"### Activity in past 90 days {passes_activity_threshold}\n\n"
-    response_body += f"> Comments: {user_comments_window} ({user_comments_total} since 2021-06-01)"
-    response_body += f" Submissions: {user_posts_window} ({user_posts_total} since 2021-06-01)\n\n"
+    response_body += f"> Comments excluding CDF: {user_comments_window} ({user_comments_total} since 2020-01-01)"
+    if cdf_window or cdf_total:
+        response_body += f" (including CDF: {cdf_window}, {cdf_total} since 2020-01-01)"
+    else:
+        response_body += " (no CDF activity)"
+    response_body += f" Submissions: {user_posts_window} ({user_posts_total} since 2020-01-01)\n\n"
     response_body += f"> Mod actions since 2021-01-01: {mod_actions_str}\n\n"
 
     redditor = reddit.redditor(username)

diff --git a/scripts/participation_check.py b/scripts/participation_check.py
@@ -0,0 +1,94 @@
+"""
+Looks for users that commented in a minimum percentage of threads in the list. Use with -h for instructions.
+"""
+
+import argparse
+from collections import Counter
+from datetime import timedelta
+
+from data.post_data import PostModel
+from services import post_service, comment_service
+from utils import reddit as reddit_utils
+from utils.logger import logger
+
+
+def get_post_users(post: PostModel, max_time_after: timedelta):
+    comments = comment_service.get_comments_by_post_id(post.id36)
+    usernames = set()
+    for comment in comments:
+        # Deleted/unknown users don't count.
+        if not comment.author:
+            continue
+        # Skip comments past the time limit.
+        if post.created_time + max_time_after < comment.created_time:
+            continue
+        usernames.add(comment.author)
+
+    return usernames
+
+
+def load_post_list(post_list: list[str]) -> list[str]:
+    post_id_list = []
+    for post_url in post_list:
+        parsed_post_id = reddit_utils.POST_ID_REGEX.match(post_url)
+        if not parsed_post_id:
+            continue
+
+        post_id_list.append(parsed_post_id.groupdict().get("id"))
+    return post_id_list
+
+
+def main(post_list: list[str], min_percentage: float, max_time_after: timedelta):
+    post_id_list = load_post_list(post_list)
+    post_with_users = {}
+    for post_id in post_id_list:
+        post = post_service.get_post_by_id(post_id)
+        if not post:
+            logger.warning(f"Post with ID {post_id} not found")
+            continue
+
+        user_set = get_post_users(post, max_time_after)
+        user_set.add(post.author)
+        post_with_users[post] = list(user_set)
+
+    post_count = len(post_with_users)
+    user_counts = Counter([username for user_list in post_with_users.values() for username in user_list])
+    users_sorted = sorted(user_counts.items(), key=lambda kv: kv[1], reverse=True)
+    for username, user_count in users_sorted:
+        user_percentage = int(round(user_count / post_count, 2) * 100)
+        if user_percentage < min_percentage:
+            break
+        logger.info(f"{username:>22}: {user_percentage:>3}% ({user_count:3} / {post_count:<3})")
+
+
+def _get_parser() -> argparse.ArgumentParser:
+    new_parser = argparse.ArgumentParser(description="Check users who commented on a post.")
+    new_parser.add_argument(
+        "-f", "--file", action="store", required=True, help="File path to list of post URLs to check."
+    )
+    new_parser.add_argument(
+        "-p",
+        "--percentage",
+        action="store",
+        type=int,
+        default=80,
+        help="Minimum percentage of threads required (default 80).",
+    )
+    new_parser.add_argument(
+        "-d",
+        "--max_days",
+        type=lambda d: timedelta(days=int(d)),
+        default=timedelta(days=7),
+        help="Maximum number of days after thread posting to count comments toward participation (default 7).",
+    )
+    return new_parser
+
+
+if __name__ == "__main__":
+    parser = _get_parser()
+    args = parser.parse_args()
+
+    with open(args.file, "r") as post_file:
+        post_url_list = [s.strip() for s in post_file.readlines()]
+
+    main(post_url_list, args.percentage, args.max_days)
diff --git a/scripts/reports.py b/scripts/reports.py
@@ -94,10 +94,15 @@ def _report_monthly(report_args: argparse.Namespace):
     )
 
     banned_users = mod_action_service.count_mod_actions(
-        "banuser", start_date, end_date, distinct=True, exclude_mod_accounts_list=_bots_and_admins
+        "banuser", start_date, end_date, distinct=True, exclude_mod_accounts_list=mod_constants.ADMINS
     )
     permabanned_users = mod_action_service.count_mod_actions(
-        "banuser", start_date, end_date, distinct=True, details="permanent", exclude_mod_accounts_list=_bots_and_admins
+        "banuser",
+        start_date,
+        end_date,
+        distinct=True,
+        details="permanent",
+        exclude_mod_accounts_list=mod_constants.ADMINS,
     )
     # banned_users_bots = mod_action_service.count_mod_actions(
     #     "banuser", start_date, end_date, distinct=True, mod_accounts_list=mod_constants.BOTS

diff --git a/src/data/comment_data.py b/src/data/comment_data.py
@@ -61,27 +61,35 @@ def get_comments_by_post_id(self, post_id: int) -> list[CommentModel]:
         return [CommentModel(row) for row in result_rows]
 
     def get_comments_by_username(
-        self, username: str, start_date: str = None, end_date: str = None
+        self, username: str, start_date: str = None, end_date: str = None, exclude_cdf: bool = False
     ) -> list[CommentModel]:
-        where_clauses = ["lower(author) = :username"]
+        where_clauses = ["lower(c.author) = :username"]
         sql_kwargs = {"username": username.lower()}
 
         if start_date:
-            where_clauses.append("created_time >= :start_date")
+            where_clauses.append("c.created_time >= :start_date")
             sql_kwargs["start_date"] = start_date
 
         if end_date:
-            where_clauses.append("created_time < :end_date")
+            where_clauses.append("c.created_time < :end_date")
             sql_kwargs["end_date"] = end_date
 
         where_str = " AND ".join(where_clauses)
 
-        sql = text(
-            f"""
-        SELECT * FROM comments
-        WHERE {where_str};
-        """
-        )
+        if exclude_cdf:
+            sql = text(
+                f"""
+                SELECT * FROM comments c JOIN posts p ON c.post_id = p.id
+                WHERE {where_str} AND p.title not like 'Casual Discussion Fridays - Week of %'
+                """
+            )
+        else:
+            sql = text(
+                f"""
+                SELECT * FROM comments c
+                WHERE {where_str};
+                """
+            )
 
         result_rows = self.execute(sql, **sql_kwargs)
         return [CommentModel(row) for row in result_rows]

diff --git a/src/feeds/mod_log.py b/src/feeds/mod_log.py
@@ -252,7 +252,9 @@ def send_discord_message(mod_action: ModActionModel):
         embed_json["title"] = f"{mod_action.mod}: {mod_action.action} by {mod_action.target_user}"
     elif mod_action.target_post_id:
         target = post_service.get_post_by_id(mod_action.target_post_id)
-        title = discord.escape_formatting(f"{mod_action.mod}: {mod_action.action} - {target.title}")
+        title = discord.escape_formatting(
+            f"{mod_action.mod}: {mod_action.action} - {target.title} by {mod_action.target_user}"
+        )
         embed_json["title"] = title[:253] + "..." if len(title) > 256 else title
     elif mod_action.target_user:
         target = user_service.get_user(mod_action.target_user)

diff --git a/src/services/comment_service.py b/src/services/comment_service.py
@@ -33,12 +33,14 @@ def get_comments_by_post_id(post_id: Union[str, int]) -> list[CommentModel]:
     return _comment_data.get_comments_by_post_id(post_id)
 
 
-def get_comments_by_username(username: str, start_date: str = None, end_date: str = None) -> list[CommentModel]:
+def get_comments_by_username(
+    username: str, start_date: str = None, end_date: str = None, exclude_cdf: bool = False
+) -> list[CommentModel]:
     """
     Gets all comments by a user, optionally within a specified time frame.
     """
 
-    return _comment_data.get_comments_by_username(username, start_date, end_date)
+    return _comment_data.get_comments_by_username(username, start_date, end_date, exclude_cdf)
 
 
 def count_comments(start_date: date = None, end_date: date = None, exclude_authors: list = None) -> int:

diff --git a/src/services/post_service.py b/src/services/post_service.py
@@ -134,6 +134,10 @@ def format_post_embed(post: PostModel):
     if post.metadata and post.metadata.get("spoiler"):
         embed_json["fields"].append({"name": "Spoiler", "value": "\u200b", "inline": True})
 
+    if post.deleted_time:
+        deleted_timestamp = int(post.deleted_time.timestamp())
+        embed_json["fields"].append({"name": "Deleted", "value": f"<t:{deleted_timestamp}:t>", "inline": True})
+
     return embed_json
 
 

diff --git a/src/utils/reddit.py b/src/utils/reddit.py
@@ -1,6 +1,7 @@
 """Utilities regarding Reddit posts/users/etc"""
 
 import copy
+import re
 import typing
 
 import mintotp
@@ -10,6 +11,20 @@
     from data.base_data import BaseModel
 
 
+POST_ID_REGEX = re.compile(
+    r"""
+        (?:
+            (https?://(?:\w+\.)?reddit\.com(?:/r/anime/comments))|  # Regular reddit URLs on any subdomain
+            (https?://redd\.it)|                                    # Shortened URLs
+            /comments                                               # Relative links
+        )
+        /(?P<id>\w+)                                                # Post ID, the part we care about
+        (?:/?\w*/?(?:\.compact)?\??)?                               # Everything afterward is irrelevant
+""",
+    re.VERBOSE,
+)
+
+
 _b36_alphabet = "0123456789abcdefghijklmnopqrstuvwxyz"