Skip to content

Commit

Permalink
Merge pull request #32 from r-anime/participation_script
Browse files Browse the repository at this point in the history
Add participation check script
  • Loading branch information
durinthal authored Dec 23, 2023
2 parents aa41f3d + 9c14485 commit ab7eaa8
Show file tree
Hide file tree
Showing 9 changed files with 161 additions and 21 deletions.
2 changes: 1 addition & 1 deletion scripts/frontpage.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def _format_line(submission, position, rank_change, total_hours):

line += f" {submission.score:>5}"

line += " {:>24}".format(f"[{submission.link_flair_text}]({submission.id})")
line += " {:>25}".format(f"[{submission.link_flair_text}]({submission.id})")

line += f" <{submission.author.name}>" if submission.author is not None else " <[deleted]>"

Expand Down
20 changes: 15 additions & 5 deletions scripts/modapps.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,13 @@ def process_row(row, activity_start_date, activity_end_date):
# and overall history on /r/anime.
activity_start_time_str = activity_start_date.isoformat()
activity_end_time_str = activity_end_date.isoformat()
user_comments_window = len(
user_comments_window_with_cdf = len(
comment_service.get_comments_by_username(username, activity_start_time_str, activity_end_time_str)
)
user_comments_window = len(
comment_service.get_comments_by_username(username, activity_start_time_str, activity_end_time_str, True)
)
cdf_window = user_comments_window_with_cdf - user_comments_window
user_posts_window = len(
post_service.get_posts_by_username(username, activity_start_time_str, activity_end_time_str)
)
Expand All @@ -59,14 +63,20 @@ def process_row(row, activity_start_date, activity_end_date):
mod_actions[mod_action.action].append(mod_action)
mod_actions_str = ", ".join(f"{action} ({len(action_list)})" for action, action_list in mod_actions.items())

user_comments_total = len(comment_service.get_comments_by_username(username, "2021-06-01"))
user_posts_total = len(post_service.get_posts_by_username(username, "2021-06-01"))
user_comments_total_with_cdf = len(comment_service.get_comments_by_username(username, "2020-01-01"))
user_comments_total = len(comment_service.get_comments_by_username(username, "2020-01-01", exclude_cdf=True))
cdf_total = user_comments_total_with_cdf - user_comments_total
user_posts_total = len(post_service.get_posts_by_username(username, "2020-01-01"))

passes_activity_threshold = "✅" if user_comments_window + user_posts_window > 50 else "❌"

response_body += f"### Activity in past 90 days {passes_activity_threshold}\n\n"
response_body += f"> Comments: {user_comments_window} ({user_comments_total} since 2021-06-01)"
response_body += f" Submissions: {user_posts_window} ({user_posts_total} since 2021-06-01)\n\n"
response_body += f"> Comments excluding CDF: {user_comments_window} ({user_comments_total} since 2020-01-01)"
if cdf_window or cdf_total:
response_body += f" (including CDF: {cdf_window}, {cdf_total} since 2020-01-01)"
else:
response_body += " (no CDF activity)"
response_body += f" Submissions: {user_posts_window} ({user_posts_total} since 2020-01-01)\n\n"
response_body += f"> Mod actions since 2021-01-01: {mod_actions_str}\n\n"

redditor = reddit.redditor(username)
Expand Down
94 changes: 94 additions & 0 deletions scripts/participation_check.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
"""
Looks for users that commented in a minimum percentage of threads in the list. Use with -h for instructions.
"""

import argparse
from collections import Counter
from datetime import timedelta

from data.post_data import PostModel
from services import post_service, comment_service
from utils import reddit as reddit_utils
from utils.logger import logger


def get_post_users(post: PostModel, max_time_after: timedelta):
comments = comment_service.get_comments_by_post_id(post.id36)
usernames = set()
for comment in comments:
# Deleted/unknown users don't count.
if not comment.author:
continue
# Skip comments past the time limit.
if post.created_time + max_time_after < comment.created_time:
continue
usernames.add(comment.author)

return usernames


def load_post_list(post_list: list[str]) -> list[str]:
post_id_list = []
for post_url in post_list:
parsed_post_id = reddit_utils.POST_ID_REGEX.match(post_url)
if not parsed_post_id:
continue

post_id_list.append(parsed_post_id.groupdict().get("id"))
return post_id_list


def main(post_list: list[str], min_percentage: float, max_time_after: timedelta):
post_id_list = load_post_list(post_list)
post_with_users = {}
for post_id in post_id_list:
post = post_service.get_post_by_id(post_id)
if not post:
logger.warning(f"Post with ID {post_id} not found")
continue

user_set = get_post_users(post, max_time_after)
user_set.add(post.author)
post_with_users[post] = list(user_set)

post_count = len(post_with_users)
user_counts = Counter([username for user_list in post_with_users.values() for username in user_list])
users_sorted = sorted(user_counts.items(), key=lambda kv: kv[1], reverse=True)
for username, user_count in users_sorted:
user_percentage = int(round(user_count / post_count, 2) * 100)
if user_percentage < min_percentage:
break
logger.info(f"{username:>22}: {user_percentage:>3}% ({user_count:3} / {post_count:<3})")


def _get_parser() -> argparse.ArgumentParser:
new_parser = argparse.ArgumentParser(description="Check users who commented on a post.")
new_parser.add_argument(
"-f", "--file", action="store", required=True, help="File path to list of post URLs to check."
)
new_parser.add_argument(
"-p",
"--percentage",
action="store",
type=int,
default=80,
help="Minimum percentage of threads required (default 80).",
)
new_parser.add_argument(
"-d",
"--max_days",
type=lambda d: timedelta(days=int(d)),
default=timedelta(days=7),
help="Maximum number of days after thread posting to count comments toward participation (default 7).",
)
return new_parser


if __name__ == "__main__":
parser = _get_parser()
args = parser.parse_args()

with open(args.file, "r") as post_file:
post_url_list = [s.strip() for s in post_file.readlines()]

main(post_url_list, args.percentage, args.max_days)
9 changes: 7 additions & 2 deletions scripts/reports.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,10 +94,15 @@ def _report_monthly(report_args: argparse.Namespace):
)

banned_users = mod_action_service.count_mod_actions(
"banuser", start_date, end_date, distinct=True, exclude_mod_accounts_list=_bots_and_admins
"banuser", start_date, end_date, distinct=True, exclude_mod_accounts_list=mod_constants.ADMINS
)
permabanned_users = mod_action_service.count_mod_actions(
"banuser", start_date, end_date, distinct=True, details="permanent", exclude_mod_accounts_list=_bots_and_admins
"banuser",
start_date,
end_date,
distinct=True,
details="permanent",
exclude_mod_accounts_list=mod_constants.ADMINS,
)
# banned_users_bots = mod_action_service.count_mod_actions(
# "banuser", start_date, end_date, distinct=True, mod_accounts_list=mod_constants.BOTS
Expand Down
28 changes: 18 additions & 10 deletions src/data/comment_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,27 +61,35 @@ def get_comments_by_post_id(self, post_id: int) -> list[CommentModel]:
return [CommentModel(row) for row in result_rows]

def get_comments_by_username(
self, username: str, start_date: str = None, end_date: str = None
self, username: str, start_date: str = None, end_date: str = None, exclude_cdf: bool = False
) -> list[CommentModel]:
where_clauses = ["lower(author) = :username"]
where_clauses = ["lower(c.author) = :username"]
sql_kwargs = {"username": username.lower()}

if start_date:
where_clauses.append("created_time >= :start_date")
where_clauses.append("c.created_time >= :start_date")
sql_kwargs["start_date"] = start_date

if end_date:
where_clauses.append("created_time < :end_date")
where_clauses.append("c.created_time < :end_date")
sql_kwargs["end_date"] = end_date

where_str = " AND ".join(where_clauses)

sql = text(
f"""
SELECT * FROM comments
WHERE {where_str};
"""
)
if exclude_cdf:
sql = text(
f"""
SELECT * FROM comments c JOIN posts p ON c.post_id = p.id
WHERE {where_str} AND p.title not like 'Casual Discussion Fridays - Week of %'
"""
)
else:
sql = text(
f"""
SELECT * FROM comments c
WHERE {where_str};
"""
)

result_rows = self.execute(sql, **sql_kwargs)
return [CommentModel(row) for row in result_rows]
Expand Down
4 changes: 3 additions & 1 deletion src/feeds/mod_log.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,9 @@ def send_discord_message(mod_action: ModActionModel):
embed_json["title"] = f"{mod_action.mod}: {mod_action.action} by {mod_action.target_user}"
elif mod_action.target_post_id:
target = post_service.get_post_by_id(mod_action.target_post_id)
title = discord.escape_formatting(f"{mod_action.mod}: {mod_action.action} - {target.title}")
title = discord.escape_formatting(
f"{mod_action.mod}: {mod_action.action} - {target.title} by {mod_action.target_user}"
)
embed_json["title"] = title[:253] + "..." if len(title) > 256 else title
elif mod_action.target_user:
target = user_service.get_user(mod_action.target_user)
Expand Down
6 changes: 4 additions & 2 deletions src/services/comment_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,14 @@ def get_comments_by_post_id(post_id: Union[str, int]) -> list[CommentModel]:
return _comment_data.get_comments_by_post_id(post_id)


def get_comments_by_username(username: str, start_date: str = None, end_date: str = None) -> list[CommentModel]:
def get_comments_by_username(
username: str, start_date: str = None, end_date: str = None, exclude_cdf: bool = False
) -> list[CommentModel]:
"""
Gets all comments by a user, optionally within a specified time frame.
"""

return _comment_data.get_comments_by_username(username, start_date, end_date)
return _comment_data.get_comments_by_username(username, start_date, end_date, exclude_cdf)


def count_comments(start_date: date = None, end_date: date = None, exclude_authors: list = None) -> int:
Expand Down
4 changes: 4 additions & 0 deletions src/services/post_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,10 @@ def format_post_embed(post: PostModel):
if post.metadata and post.metadata.get("spoiler"):
embed_json["fields"].append({"name": "Spoiler", "value": "\u200b", "inline": True})

if post.deleted_time:
deleted_timestamp = int(post.deleted_time.timestamp())
embed_json["fields"].append({"name": "Deleted", "value": f"<t:{deleted_timestamp}:t>", "inline": True})

return embed_json


Expand Down
15 changes: 15 additions & 0 deletions src/utils/reddit.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Utilities regarding Reddit posts/users/etc"""

import copy
import re
import typing

import mintotp
Expand All @@ -10,6 +11,20 @@
from data.base_data import BaseModel


POST_ID_REGEX = re.compile(
r"""
(?:
(https?://(?:\w+\.)?reddit\.com(?:/r/anime/comments))| # Regular reddit URLs on any subdomain
(https?://redd\.it)| # Shortened URLs
/comments # Relative links
)
/(?P<id>\w+) # Post ID, the part we care about
(?:/?\w*/?(?:\.compact)?\??)? # Everything afterward is irrelevant
""",
re.VERBOSE,
)


_b36_alphabet = "0123456789abcdefghijklmnopqrstuvwxyz"


Expand Down

0 comments on commit ab7eaa8

Please sign in to comment.