Skip to content

Commit

Permalink
Reduce CPU sampling size
Browse files Browse the repository at this point in the history
Add an env var to override `check_existing` flag
Remove all top level vars
Create a class for GitHub functionalities
  • Loading branch information
dormant-user committed Jun 7, 2024
1 parent 510a703 commit fe39e02
Show file tree
Hide file tree
Showing 5 changed files with 237 additions and 158 deletions.
7 changes: 4 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,17 +27,18 @@ Monitor that runs in the background to report the health status of Jarvis and it
**Mandatory**
- **source_map** - Path to the processes.yaml file generated by [Jarvis][4]
- **git_user** - GitHub username to authenticate `GitPython`
- **git_token** - GitHub PAT to push changes using GH API and authenticate `GitPython`

**Optional**
- **debug** - Any value to enable debug mode. Disabled by default.
- **git_user** - GitHub username to authenticate GitPython.
- **git_owner** - GitHub owner for the account.
- **git_token** - GitHub PAT to push changes using GH API.
- **git_owner** - GitHub owner for the account. Defaults to `thevickypedia`
- **gmail_user** - Gmail username to authenticate the account.
- **gmail_pass** - gmail password to authenticate the account.
- **recipient** - Email address to send an email notification.
- **skip_schedule** - Skip the monitoring schedule at a particular time. Example: `12:00 AM`
- **check_existing** - Check existing `index.html` file for changes, before executing `push`. Defaults to `True`
- **override_check** - List of `minutes` to set the `check_existing` flag as `False`. Defaults to `[0]` (every hour)

[1]: https://github.com/thevickypedia/Jarvis
[2]: https://jarvis-health.vigneshrao.com
Expand Down
94 changes: 59 additions & 35 deletions models/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import sys
import time
from datetime import datetime
from typing import Union
from typing import List, Union

from pydantic import BaseModel, EmailStr, FilePath, HttpUrl, NewPath
from pydantic_settings import BaseSettings
Expand Down Expand Up @@ -45,8 +45,9 @@ class EnvConfig(BaseSettings):
gmail_user: Union[EmailStr, None] = None
gmail_pass: Union[str, None] = None
recipient: Union[EmailStr, None] = None
skip_schedule: str = None
skip_schedule: Union[str, None] = None
check_existing: bool = True
override_check: List[int] = [0]

class Config:
"""Environment variables configuration."""
Expand All @@ -60,51 +61,65 @@ class Config:


class ColorCode(BaseModel):
"""Color codes for red and green status indicators."""
"""Color codes for red and green status indicators.
>>> ColorCode
"""

red: str = "🔴" # large green circle
green: str = "🟢" # large red circle
blue: str = "🔵" # large blue circle
yellow: str = "🟡" # large yellow circle


LOGGER = logging.getLogger("jarvis")
DEFAULT_LOG_FORMAT = logging.Formatter(
datefmt="%b-%d-%Y %I:%M:%S %p",
fmt="%(asctime)s - %(levelname)s - [%(module)s:%(lineno)d] - %(funcName)s - %(message)s",
)
LOG_FILE: str = datetime.now().strftime(os.path.join("logs", "jarvis_%d-%m-%Y.log"))
if env.log == LogOptions.file:
if not os.path.isdir("logs"):
os.mkdir("logs")
HANDLER = logging.FileHandler(filename=LOG_FILE, mode="a")
write: str = "".join(["*" for _ in range(120)])
with open(LOG_FILE, "a+") as file:
file.seek(0)
if not file.read():
file.write(f"{write}\n")
else:
file.write(f"\n{write}\n")
file.flush()
else:
HANDLER = logging.StreamHandler()
HANDLER.setFormatter(fmt=DEFAULT_LOG_FORMAT)
LOGGER.addHandler(hdlr=HANDLER)
def get_logger(name: str) -> logging.Logger:
"""Customize logger as per the environment variables set.
if env.debug:
LOGGER.setLevel(level=logging.DEBUG)
else:
LOGGER.setLevel(level=logging.INFO)
Args:
name: Name of the logger.
if env.skip_schedule:
try:
datetime.strptime(env.skip_schedule, "%I:%M %p") # Validate datetime format
except ValueError as error:
LOGGER.warning(error)
Returns:
logging.Logger:
Returns the customized logger.
"""
logger = logging.getLogger(name)
log_file = datetime.now().strftime(os.path.join("logs", "jarvis_%d-%m-%Y.log"))
if env.log == LogOptions.file:
if not os.path.isdir("logs"):
os.mkdir("logs")
handler = logging.FileHandler(filename=log_file, mode="a")
write: str = "".join(["*" for _ in range(120)])
with open(log_file, "a+") as file:
file.seek(0)
if not file.read():
file.write(f"{write}\n")
else:
file.write(f"\n{write}\n")
file.flush()
else:
handler = logging.StreamHandler()
handler.setFormatter(
fmt=logging.Formatter(
datefmt="%b-%d-%Y %I:%M:%S %p",
fmt="%(asctime)s - %(levelname)s - [%(module)s:%(lineno)d] - %(funcName)s - %(message)s",
)
)
logger.addHandler(hdlr=handler)
if env.debug:
logger.setLevel(level=logging.DEBUG)
else:
logger.setLevel(level=logging.INFO)
return logger


def get_webpage() -> Union[str, None]:
"""Tries to get the hosted webpage from CNAME file if available in docs directory."""
"""Tries to get the hosted webpage from CNAME file if available in docs directory.
Returns:
str:
Returns the website mentioned in the CNAME file.
"""
try:
with open(os.path.join("docs", "CNAME")) as f:
return f.read().strip()
Expand All @@ -119,6 +134,7 @@ class Constants(BaseModel):
"""

SKIPPER_FORMAT: str = "%H:%M"
TIMEZONE: str = time.strftime("%Z %z")
DATETIME: str = datetime.now().strftime("%B %d, %Y - %I:%M %p") + " " + TIMEZONE
NOTIFICATION: Union[FilePath, NewPath] = os.path.join(
Expand All @@ -135,3 +151,11 @@ class Constants(BaseModel):

static = Constants()
color_codes = ColorCode()
LOGGER = get_logger("jarvis")

if env.skip_schedule:
try:
# Validate datetime format
datetime.strptime(env.skip_schedule, static.SKIPPER_FORMAT)
except ValueError as error:
LOGGER.warning(error)
20 changes: 16 additions & 4 deletions models/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,18 @@
from models.constants import LOGGER, env, static


def check_cpu_util(process: psutil.Process) -> Dict:
"""Check CPU utilization, number of threads and open files."""
def check_performance(process: psutil.Process) -> Dict[str, int]:
"""Checks performance by monitoring CPU utilization, number of threads and open files.
Args:
process: Process object.
Returns:
Dict[str, int]:
Returns a dictionary of metrics and their values as key-value pair.
"""
name = process.func # noqa
cpu = process.cpu_percent(interval=3)
cpu = process.cpu_percent(interval=0.5)
threads = process.num_threads()
open_files = len(process.open_files())
info_dict = {"cpu": cpu, "threads": threads, "open_files": open_files}
Expand All @@ -25,7 +33,11 @@ def check_cpu_util(process: psutil.Process) -> Dict:


def send_email(status: dict = None) -> None:
"""Sends an email notification if Jarvis is down."""
"""Sends an email notification if Jarvis is down.
Args:
status: Translated status dictionary.
"""
if not all((env.gmail_user, env.gmail_pass, env.recipient)):
LOGGER.warning("Not all env vars are present for sending an email!!")
return
Expand Down
46 changes: 33 additions & 13 deletions monitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

from models.conditions import all_pids_are_red, main_process_is_red, some_pids_are_red
from models.constants import LOGGER, color_codes, env, static
from models.helper import check_cpu_util, send_email
from models.helper import check_performance, send_email

STATUS_DICT = {}

Expand All @@ -26,7 +26,11 @@ def get_data() -> Dict[str, Dict[int, List[str]]]:


def publish_docs(status: dict = None) -> None:
"""Updates the docs/index.html file."""
"""Updates the docs/index.html file.
Args:
status: Translated status dictionary.
"""
LOGGER.info("Updating index.html")
t_desc, l_desc = "", ""
if not status: # process map is missing
Expand Down Expand Up @@ -76,11 +80,20 @@ def publish_docs(status: dict = None) -> None:
file.flush()


def classify_processes(process: psutil.Process, proc_impact: List[str]):
"""Classify all processes into good, bad and evil."""
def classify_processes(process: psutil.Process, proc_impact: List[str]) -> None:
"""Classify all processes into good (green - all ok), bad (yellow - degraded performance) and evil (red - bad PID).
Args:
process: Process object.
proc_impact: Impact because of the process.
Raises:
Exception:
Raises a bare exception to notify the worker, that the thread has failed.
"""
func_name = process.func # noqa
if psutil.pid_exists(process.pid) and process.status() == psutil.STATUS_RUNNING:
if issue := check_cpu_util(process=process):
if issue := check_performance(process=process):
LOGGER.info("%s [%d] is INTENSE", func_name, process.pid)
# combine list of string with list of tuples
proc_impact.append(
Expand All @@ -97,7 +110,16 @@ def classify_processes(process: psutil.Process, proc_impact: List[str]):


def extract_proc_info(func_name: str, proc_info: Dict[int, List[str]]):
"""Extract process information from PID and classify the process to update status dictionary."""
"""Validates the process ID and calls the classifier function.
Args:
func_name: Function name.
proc_info: Process information as a dictionary.
Raises:
Exception:
Raises a bare exception to notify the worker, that the thread has failed.
"""
for pid, impact in proc_info.items():
try:
process = psutil.Process(pid=pid)
Expand All @@ -112,19 +134,15 @@ def extract_proc_info(func_name: str, proc_info: Dict[int, List[str]]):

def main() -> None:
"""Checks the health of all processes in the mapping and actions accordingly."""
if env.skip_schedule == datetime.now().strftime("%I:%M %p"):
LOGGER.info("Schedule ignored at '%s'", env.skip_schedule)
return
# Enforce check_existing flag to False every 1 hour
if datetime.now().minute == 0:
if datetime.now().minute in env.override_check:
env.check_existing = False
LOGGER.info("Monitoring processes health at: %s", static.DATETIME)
if not (data := get_data()):
publish_docs()
return
notify = False
futures = {}
with ThreadPoolExecutor(max_workers=len(data) * 2) as executor:
with ThreadPoolExecutor(max_workers=len(data)) as executor:
for key, value in data.items():
future = executor.submit(
extract_proc_info, **dict(func_name=key, proc_info=value)
Expand All @@ -133,7 +151,9 @@ def main() -> None:
for future in as_completed(futures):
if future.exception():
LOGGER.error(
f"Thread processing for {futures[future]!r} received an exception: {future.exception()}"
"Thread processing for '%s' received an exception: %s",
futures[future],
future.exception(),
)
notify = True
data_keys = sorted(data.keys())
Expand Down
Loading

0 comments on commit fe39e02

Please sign in to comment.