From e4e3e5d50feaf99fb3f6b410281a403cd0923b17 Mon Sep 17 00:00:00 2001
From: Andrei Burd <bandrei@yotpo.com>
Date: Mon, 23 Feb 2015 17:13:36 +0200
Subject: [PATCH 1/2] Added Cloudwatch reporting

for replicaLag, LockPercentage and current_lock
---
 check_mongodb.py | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/check_mongodb.py b/check_mongodb.py
index bc6278a..72fbf45 100755
--- a/check_mongodb.py
+++ b/check_mongodb.py
@@ -18,18 +18,23 @@
 #   - @Andor on github
 #   - Steven Richards - Captainkrtek on github
 #   - Max Vernimmen
+#   - @burdandrei Added CloudWatch monitoring for Mongodb
 #
 # USAGE
 #
 # See the README.md
 #
 
+
 import sys
 import time
+import datetime
 import optparse
 import textwrap
 import re
 import os
+import commands
+from boto.ec2.cloudwatch import CloudWatchConnection
 
 try:
     import pymongo
@@ -44,6 +49,7 @@
 else:
     import pymongo.son as son
 
+cloudwatch_report = False
 
 #
 # thanks to http://stackoverflow.com/a/1229667/72987
@@ -82,6 +88,17 @@ def numeric_type(param):
         return True
     return False
 
+#Get the instanceId for our machine. This is important later for
+#autoscaling. The dimensions we select here when publishing
+#must be matched later by our autoscale policy
+def get_instance_id():
+    ret, instanceId = commands.getstatusoutput("wget -q -O - http://169.254.169.254/latest/meta-data/instance-id")
+    return instanceId
+
+def put_data(namespace, name, value, unit, dimensions):
+    c = CloudWatchConnection()
+    now = datetime.datetime.now()
+    c.put_metric_data(namespace, name, value, now, unit, dimensions)
 
 def check_levels(param, warning, critical, message, ok=[]):
     if (numeric_type(critical) and numeric_type(warning)):
@@ -145,6 +162,7 @@ def main(argv):
     p.add_option('-q', '--querytype', action='store', dest='query_type', default='query', help='The query type to check [query|insert|update|delete|getmore|command] from queries_per_second')
     p.add_option('-c', '--collection', action='store', dest='collection', default='admin', help='Specify the collection to check')
     p.add_option('-T', '--time', action='store', type='int', dest='sample_time', default=1, help='Time used to sample number of pages faults')
+    p.add_option('--cloudwatch-report', action='store_true', dest='cloudwatch_report', default=False,help='Report sampled data to cloudwatch')
 
     options, arguments = p.parse_args()
     host = options.host
@@ -167,6 +185,8 @@ def main(argv):
     database = options.database
     ssl = options.ssl
     replicaset = options.replicaset
+    global cloudwatch_report
+    cloudwatch_report = options.cloudwatch_report
 
     if action == 'replica_primary' and replicaset is None:
         return "replicaset must be passed in when using replica_primary check"
@@ -438,6 +458,12 @@ def check_rep_lag(con, host, port, warning, critical, percent, perf_data, max_la
             except:
                 lag = float(optime_lag.seconds + optime_lag.days * 24 * 3600)
 
+            if cloudwatch_report:
+                replicaset = rs_status["set"]
+                instanceId = get_instance_id()
+                put_data('Mongo', 'replicationLag', lag, 'Seconds',{'replicaSet': replicaset})
+                put_data('Mongo', 'replicationLag', lag, 'Seconds',{'InstanceId': instanceId, 'replicaSet': replicaset})
+
             if percent:
                 err, con = mongo_connect(primary_node['name'].split(':')[0], int(primary_node['name'].split(':')[1]), False, user, passwd)
                 if err != 0:
@@ -617,6 +643,11 @@ def check_lock(con, warning, critical, perf_data):
             lock_percentage = float(lockTime) / float(totalTime) * 100
         message = "Lock Percentage: %.2f%%" % lock_percentage
         message += performance_data(perf_data, [("%.2f" % lock_percentage, "lock_percentage", warning, critical)])
+        if cloudwatch_report:
+            instanceId = get_instance_id()
+            replicaset = data['repl']['setName']
+            put_data('Mongo', 'LockPercentage', lock_percentage, 'Percent',{'replicaSet': replicaset})
+            put_data('Mongo', 'LockPercentage', lock_percentage, 'Percent',{'InstanceId': instanceId, 'replicaSet': replicaset})
         return check_levels(lock_percentage, warning, critical, message)
 
     except Exception, e:
@@ -1130,6 +1161,11 @@ def check_current_lock(con, host, warning, critical, perf_data):
         lock_percentage = delta[2] / delta[1] * 100     # lockTime/totalTime*100
         message = "Current Lock Percentage: %.2f%%" % lock_percentage
         message += performance_data(perf_data, [("%.2f" % lock_percentage, "current_lock_percentage", warning, critical)])
+        if cloudwatch_report:
+            instanceId = get_instance_id()
+            replicaset = data['repl']['setName']
+            put_data('Mongo', 'CurrentLockPercentage', lock_percentage, 'Percent',{'replicaSet': replicaset})
+            put_data('Mongo', 'CurrentLockPercentage', lock_percentage, 'Percent',{'InstanceId': instanceId, 'replicaSet': replicaset})
         return check_levels(lock_percentage, warning, critical, message)
     else:
         return exit_with_general_warning("problem reading data from temp file")

From 4379f6f5e33bc0512413168fc1e00654423e47f5 Mon Sep 17 00:00:00 2001
From: Andrei Burd <bandrei@yotpo.com>
Date: Tue, 24 Feb 2015 16:21:49 +0200
Subject: [PATCH 2/2] Fix boto dependency issues

Moved all the imports inside cloudwatch-related definitions to avoid
dependency issues
---
 check_mongodb.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/check_mongodb.py b/check_mongodb.py
index 72fbf45..450f00c 100755
--- a/check_mongodb.py
+++ b/check_mongodb.py
@@ -25,16 +25,12 @@
 # See the README.md
 #
 
-
 import sys
 import time
-import datetime
 import optparse
 import textwrap
 import re
 import os
-import commands
-from boto.ec2.cloudwatch import CloudWatchConnection
 
 try:
     import pymongo
@@ -92,10 +88,13 @@ def numeric_type(param):
 #autoscaling. The dimensions we select here when publishing
 #must be matched later by our autoscale policy
 def get_instance_id():
+    import commands
     ret, instanceId = commands.getstatusoutput("wget -q -O - http://169.254.169.254/latest/meta-data/instance-id")
     return instanceId
 
 def put_data(namespace, name, value, unit, dimensions):
+    import datetime
+    from boto.ec2.cloudwatch import CloudWatchConnection
     c = CloudWatchConnection()
     now = datetime.datetime.now()
     c.put_metric_data(namespace, name, value, now, unit, dimensions)