Skip to content

Commit

Permalink
Merge pull request #59 from PhuNguyenMTT/couchbase_20012022
Browse files Browse the repository at this point in the history
LGTM
  • Loading branch information
arunpatyal authored Feb 14, 2022
2 parents 5557ebc + 69c982f commit be74295
Show file tree
Hide file tree
Showing 9 changed files with 729 additions and 0 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -217,3 +217,4 @@ module "sumologic-metrics-monitor" {
- [MariaDB](./monitor_packages/MariaDB)
- [Oracle](./monitor_packages/Oracle)
- [Squid Proxy](https://github.com/SumoLogic/terraform-sumologic-sumo-logic-monitor/tree/main/monitor_packages/SquidProxy)
- [Couchbase](https://github.com/SumoLogic/terraform-sumologic-sumo-logic-monitor/tree/main/monitor_packages/Couchbase)
19 changes: 19 additions & 0 deletions monitor_packages/Couchbase/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# sumologic-couchbase-monitors

This script configures Sumo Logic Monitors for Couchbase using Terraform modules.

This Terraform script configures pre-packaged Sumo Logic Monitors for Couchbase using Terraform modules.
For installation and configuration, please look at Sumo Logic couchbase [Help Document](https://help.sumologic.com/07Sumo-Logic-Apps/24Web_Servers/Couchbase).
## License

The Couchbase Terraform script is licensed under the apache v2.0 license.

## Issues

Raise issues at [Issues](https://github.com/SumoLogic/terraform-sumologic-sumo-logic-monitor/issues)

## Contributing

* Fork the project on [Github](https://github.com/SumoLogic/terraform-sumologic-sumo-logic-monitor).
* Make your feature addition or fix bug, write tests and commit.
* Create a pull request with one of the maintainer as Reviewer.
13 changes: 13 additions & 0 deletions monitor_packages/Couchbase/couchbase.auto.tfvars
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Sumo Logic
# Please replace <YOUR SUMO ACCESS ID> (including brackets) with your Sumo Access ID. https://help.sumologic.com/Manage/Security/Access-Keys
access_id = "<YOUR SUMO ACCESS ID>"
# Please replace <YOUR SUMO ACCESS KEY> (including brackets) with your Sumo Access KEY.
access_key = "<YOUR SUMO ACCESS KEY>"
# Please update with your deployment, refer: https://help.sumologic.com/APIs/General-API-Information/Sumo-Logic-Endpoints-and-Firewall-Security
environment = "<DEPLOYMENT>"
# This flag determines whether to enable all monitors or not.
monitors_disabled = true
# The Sumo Logic monitors will be installed in a folder specified by this value.
folder = "Couchbase"
# Sumo Logic Couchbase Clusters Filter. For eg: db_cluster=couchbase.prod.01
couchbase_data_source = ""
281 changes: 281 additions & 0 deletions monitor_packages/Couchbase/couchbase.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,281 @@
{
"name": "Couchbase",
"description": "Folder for Couchbase Monitors",
"type": "MonitorsLibraryFolderExport",
"children": [
{
"name": "Couchbase - High Memory Usage",
"description": "This alert fires when memory usage on a node in a Couchbase cluster is high.",
"type": "MonitorsLibraryMonitorExport",
"monitorType": "Metrics",
"evaluationDelay": "0m",
"alertName": null,
"runAs": null,
"notificationGroupFields": [],
"queries": [
{
"rowId": "A",
"query": "metric=couchbase_node_memory_free db_cluster=* host=* | avg by db_cluster,host"
},
{
"rowId": "B",
"query": "metric=couchbase_node_memory_total db_cluster=* host=* | avg by db_cluster,host"
},
{
"rowId": "C",
"query": "(1-#A/#B)*100 along db_cluster,host"
}
],
"triggers": [
{
"detectionMethod": "MetricsStaticCondition",
"triggerType": "Critical",
"timeRange": "-5m",
"threshold": 80,
"thresholdType": "GreaterThanOrEqual",
"occurrenceType": "Always"
},
{
"detectionMethod": "MetricsStaticCondition",
"triggerType": "ResolvedCritical",
"timeRange": "-5m",
"threshold": 80,
"thresholdType": "LessThan",
"occurrenceType": "Always"
}
],
"notifications": [],
"isDisabled": true,
"groupNotifications": true,
"playbook": ""
},
{
"name": "Couchbase - Too Many Login Failures",
"description": "This alert fires when there are too many login failures to a node in a Couchbase cluster.",
"type": "MonitorsLibraryMonitorExport",
"monitorType": "Logs",
"evaluationDelay": "0m",
"alertName": null,
"runAs": null,
"notificationGroupFields": [],
"queries": [
{
"rowId": "A",
"query": "db_cluster=* db_system=\"couchbase\" \"login failure\"\n| json \"log\" as _rawlog nodrop\n| if(isEmpty(_rawlog),_raw,_rawlog) as _raw\n| json \"name\" as event_name\n| where event_name=\"login failure\"\n| json \"remote.ip\" as client_ip \n| json \"local.ip\" as couchbase_server\n| count as count by db_cluster, couchbase_server"
}
],
"triggers": [
{
"detectionMethod": "LogsStaticCondition",
"triggerType": "Critical",
"timeRange": "-5m",
"threshold": 1000,
"thresholdType": "GreaterThanOrEqual",
"field": "count"
},
{
"detectionMethod": "LogsStaticCondition",
"triggerType": "ResolvedCritical",
"timeRange": "-5m",
"threshold": 1000,
"thresholdType": "LessThan",
"field": "count"
}
],
"notifications": [],
"isDisabled": true,
"groupNotifications": true,
"playbook": ""
},
{
"name": "Couchbase - High CPU Usage",
"description": "This alert fires when CPU usage on a node in a Couchbase cluster is high.",
"type": "MonitorsLibraryMonitorExport",
"monitorType": "Metrics",
"evaluationDelay": "0m",
"alertName": null,
"runAs": null,
"notificationGroupFields": [],
"queries": [
{
"rowId": "A",
"query": "metric=couchbase_bucket_cpu_utilization_rate db_cluster=* host=* | avg by db_cluster,host"
}
],
"triggers": [
{
"detectionMethod": "MetricsStaticCondition",
"triggerType": "Critical",
"timeRange": "-5m",
"threshold": 80,
"thresholdType": "GreaterThanOrEqual",
"occurrenceType": "Always"
},
{
"detectionMethod": "MetricsStaticCondition",
"triggerType": "ResolvedCritical",
"timeRange": "-5m",
"threshold": 80,
"thresholdType": "LessThan",
"occurrenceType": "Always"
}
],
"notifications": [],
"isDisabled": true,
"groupNotifications": true,
"playbook": ""
},
{
"name": "Couchbase - Node Down",
"description": "This alert fires when a node in the Couchbase cluster is down.",
"type": "MonitorsLibraryMonitorExport",
"monitorType": "Logs",
"evaluationDelay": "0m",
"alertName": null,
"runAs": null,
"notificationGroupFields": [],
"queries": [
{
"rowId": "A",
"query": "db_cluster=* db_system=\"couchbase\" \"error\" \"nodedown\"\n| json \"log\" as _rawlog nodrop\n| if(isEmpty(_rawlog),_raw,_rawlog) as _raw\n| if (isEmpty(pod),_sourceHost,pod) as host\n|replace (_raw,/\\s+/,\" \") as _raw\n|parse regex \"nodedown,\\s'\\S+@(?<node>\\S+)\\'\"\n| parse regex \"ns_server:error,(?<time>\\S+),\"\n|_raw as msg \n| fields db_cluster,node"
}
],
"triggers": [
{
"detectionMethod": "LogsStaticCondition",
"triggerType": "Critical",
"timeRange": "-5m",
"threshold": 0,
"thresholdType": "GreaterThan",
"field": ""
},
{
"detectionMethod": "LogsStaticCondition",
"triggerType": "ResolvedCritical",
"timeRange": "-5m",
"threshold": 0,
"thresholdType": "LessThanOrEqual",
"field": ""
}
],
"notifications": [],
"isDisabled": true,
"groupNotifications": true,
"playbook": ""
},
{
"name": "Couchbase - Node Not Respond",
"description": "This alert fires when a node in the Couchbase cluster does not respond too many times.",
"type": "MonitorsLibraryMonitorExport",
"monitorType": "Logs",
"evaluationDelay": "0m",
"alertName": null,
"runAs": null,
"notificationGroupFields": [],
"queries": [
{
"rowId": "A",
"query": "db_cluster=* db_system=\"couchbase\" \"error\" \"Some nodes didn't respond\" \n| json \"log\" as _rawlog nodrop\n| if(isEmpty(_rawlog),_raw,_rawlog) as _raw\n|replace (_raw,/\\s+/,\" \") as _raw\n| parse regex \"stats:error,(?<time>\\S+),\"\n| parse regex \"Some nodes didn't respond: \\[(?<temp_nodes>.+)\\]\" | parse regex field=temp_nodes \"\\'(?<node_temp>[^,]+)\\'\" multi | parse regex field=node_temp \"@(?<node>.+)\"| _raw as msg \n| count as count by db_cluster,node"
}
],
"triggers": [
{
"detectionMethod": "LogsStaticCondition",
"triggerType": "Critical",
"timeRange": "-5m",
"threshold": 10,
"thresholdType": "GreaterThanOrEqual",
"field": "count"
},
{
"detectionMethod": "LogsStaticCondition",
"triggerType": "ResolvedCritical",
"timeRange": "-5m",
"threshold": 10,
"thresholdType": "LessThan",
"field": "count"
}
],
"notifications": [],
"isDisabled": true,
"groupNotifications": true,
"playbook": ""
},
{
"name": "Couchbase - Bucket Not Ready",
"description": "This alert fires when a bucket in the Couchbase cluster is not ready.",
"type": "MonitorsLibraryMonitorExport",
"monitorType": "Logs",
"evaluationDelay": "0m",
"alertName": null,
"runAs": null,
"notificationGroupFields": [],
"queries": [
{
"rowId": "A",
"query": "db_cluster=* db_system=\"couchbase\" \"buckets became not ready on node\" \"error\"\n| json \"log\" as _rawlog nodrop\n| if(isEmpty(_rawlog),_raw,_rawlog) as _raw\n| if (isEmpty(pod),_sourceHost,pod) as host\n|replace (_raw,/\\s+/,\" \") as _raw\n| parse regex \"\\'\\S+@(?<node>\\S+)\\'\\:\\s+\\[(?<buckets>.+)\\],\" | parse regex field=buckets \"\\\"(?<bucket>[^,]+)\\\"\" multi\n| parse regex \"ns_server:error,(?<time>\\S+),\" | _raw as msg\n| fields db_cluster,bucket,node"
}
],
"triggers": [
{
"detectionMethod": "LogsStaticCondition",
"triggerType": "Critical",
"timeRange": "-5m",
"threshold": 0,
"thresholdType": "GreaterThan",
"field": ""
},
{
"detectionMethod": "LogsStaticCondition",
"triggerType": "ResolvedCritical",
"timeRange": "-5m",
"threshold": 0,
"thresholdType": "LessThanOrEqual",
"field": ""
}
],
"notifications": [],
"isDisabled": true,
"groupNotifications": true,
"playbook": ""
},
{
"name": "Couchbase - Too Many Error Queries on Buckets",
"description": "This alert fires when there are too many error queries on a bucket in a Couchbase cluster.",
"type": "MonitorsLibraryMonitorExport",
"monitorType": "Logs",
"evaluationDelay": "0m",
"alertName": null,
"runAs": null,
"notificationGroupFields": [],
"queries": [
{
"rowId": "A",
"query": "db_cluster=* db_system=\"couchbase\" (\"ERROR\" or \"Error\")\n| json \"log\" as _rawlog nodrop\n| if(isEmpty(_rawlog),_raw,_rawlog) as _raw\n| if (isEmpty(pod),_sourceHost,pod) as host\n| parse regex \"_time=(?<time>\\S+)\"\n| parse regex \"_msg=(?<msg>.+)\"\n| parse regex field=msg \"Keyspace\\s\\w+:(?<bucket>.+)\\.\"\n| parse regex field=msg \"Failed to perform (?<method>\\w+)\"\n|count by db_cluster,host,bucket\n| fields db_cluster,host,bucket\n"
}
],
"triggers": [
{
"detectionMethod": "LogsStaticCondition",
"triggerType": "Critical",
"timeRange": "-5m",
"threshold": 1000,
"thresholdType": "GreaterThanOrEqual",
"field": ""
},
{
"detectionMethod": "LogsStaticCondition",
"triggerType": "ResolvedCritical",
"timeRange": "-5m",
"threshold": 1000,
"thresholdType": "LessThan",
"field": ""
}
],
"notifications": [],
"isDisabled": true,
"groupNotifications": true,
"playbook": ""
}
]
}
Loading

0 comments on commit be74295

Please sign in to comment.