10from timeit
import default_timer
as timer
11from time
import sleep, time
15from influxdb
import InfluxDBClient
16from requests.exceptions
import ConnectionError
19MEASUREMENT_NAME =
"resp_time"
26 resp = os.system(
"ping -c 5 {} > /dev/null".format(ip))
27 return {
"success": resp == 0}
30def test_pgsql_connection(params):
38 connection = psycopg2.connect(host=params[
"SERVER"], port=params[
"PORT"],
39 user=params[
"USER"], password=params[
"PASS"],
40 database=params[
"DBNAME"], connect_timeout=5)
42 cursor = connection.cursor()
47 cursor.execute(
"SELECT version();")
48 record = cursor.fetchone()
51 except (Exception, psycopg2.Error)
as error:
53 return {
"success":
False}
62 time_elapsed = timer() - t0
64 return {
"success":
True,
"latency": time_elapsed}
66def test_http(http_addr):
68 result_request = requests.get(http_addr, timeout=5)
70 if result_request.status_code < 200
and result_request.status_code > 299:
71 return {
"success":
False}
72 except requests.exceptions.RequestException
as err:
73 return {
"success":
False}
74 return {
"success":
True}
76def send_email(to, short_msg, long_msg=""):
78 subj = email_subj_prefix +
": " + short_msg
79 msg =
"From: %s\nTo: %s\nSubject: %s\n\n%s\n" % (config[
"MAIL"][
"USER"], to, subj, long_msg)
81 mailserver = smtplib.SMTP(config[
"MAIL"][
"SERVER"], config[
"MAIL"][
"PORT"])
83 mailserver.login(config[
"MAIL"][
"USER"], config[
"MAIL"][
"PASS"])
85 problems = mailserver.sendmail(config[
"MAIL"][
"USER"], to.split(
","), msg)
87 except Exception
as e:
88 print(
"Error sending email - ignoring. Details: " + str(e))
91def get_notify(params_dict):
93 if "NOTIFY" in params_dict:
94 return params_dict[
"NOTIFY"]
96 return config[
"NOTIFY"]
103 global email_subj_prefix
105 argparser = argparse.ArgumentParser()
106 argparser.add_argument(
"--config", help=
"Configuration file")
107 args = argparser.parse_args()
108 if args.config
is None:
109 print(
"You must provide configuration file using --config")
111 print(
"Reading --config file: " + args.config)
114 with open(args.config)
as conf_file:
115 config = json.loads(conf_file.read())
116 except Exception
as e:
117 print(
"Error reading config file: " + str(e))
121 email_subj_prefix = config[
"NAME"]
if "NAME" in config
else os.path.splitext(os.path.basename(args.config))[0]
123 print(
"Starting " + email_subj_prefix)
124 send_email(config[
"LOG"],
"Monitoring script started")
130 influxdb_prev_success =
True
131 influxdb_initialized =
False
133 for server
in config[
"PING"]:
134 ping_failed[server] = 0
135 for server, server_params
in config[
"DATABASE"].items():
136 if server_params[
"DBMS"] ==
"PGSQL":
137 pgsql_failed[server] = 0
139 print(
"Unsupported DBMS type: " + server_params[
"DBMS"])
140 for server
in config[
"WEB"]:
141 http_failed[server] = 0
144 if "OUTPUT" in config
and config[
"OUTPUT"][
"DBMS"].upper() ==
"INFLUXDB":
145 if "USER" in config[
"OUTPUT"]
and "PASS" in config[
"OUTPUT"]:
146 iclient = InfluxDBClient(host=config[
"OUTPUT"][
"SERVER"], port=config[
"OUTPUT"][
"PORT"],
147 username=config[
"OUTPUT"][
"USER"], password=config[
"OUTPUT"][
"PASS"])
149 iclient = InfluxDBClient(host=config[
"OUTPUT"][
"SERVER"], port=config[
"OUTPUT"][
"PORT"])
151 print(
"No configuration for InfluxDB output is found.")
155 for test_name, test_params
in config[
"PING"].items():
156 result_ping = test_ping(test_params[
"HOST"])
157 print(
"PING " + test_name +
": " + str(result_ping))
158 success = result_ping[
"success"]
159 if not success
or ping_failed[test_name] > 0:
161 if ping_failed[test_name] > allow_failed_check:
162 send_email(get_notify(test_params), test_name +
" - PING state changed to UP")
163 ping_failed[test_name] = 0
165 if ping_failed[test_name] == allow_failed_check:
166 send_email(get_notify(test_params), test_name +
" - PING state changed to *** DOWN ***")
167 ping_failed[test_name] += 1
170 for test_name, test_params
in config[
"DATABASE"].items():
171 if test_params[
"DBMS"] !=
"PGSQL":
172 print(
"Unsupported DBMS type " + test_params[
"DBMS"])
174 result_pgsql = test_pgsql_connection(test_params)
175 print(
"PGSQL " + test_name +
": " + str(result_pgsql))
176 success = result_pgsql[
"success"]
177 if not success
or pgsql_failed[test_name] > 0:
179 if pgsql_failed[test_name] > allow_failed_check:
180 send_email(get_notify(test_params), test_name +
" - PGSQL state changed to UP")
181 pgsql_failed[test_name] = 0
183 if pgsql_failed[test_name] == allow_failed_check:
184 send_email(get_notify(test_params), test_name +
" - PGSQL state changed to *** DOWN ***")
185 pgsql_failed[test_name] += 1
187 if "OUTPUT" in config
and config[
"OUTPUT"][
"DBMS"].upper() ==
"INFLUXDB":
189 data = [
"{measurement},test_name={test_name} resptime={resptime} {timestamp}"
190 .format(measurement=MEASUREMENT_NAME,
192 resptime=result_pgsql.get(
"latency")
or 0,
193 timestamp=int(time() * 1000))]
195 if not influxdb_initialized:
197 iclient.create_database(config[
"OUTPUT"][
"DBNAME"])
198 influxdb_initialized =
True
199 iclient.write_points(data, database=config[
"OUTPUT"][
"DBNAME"], time_precision=
'ms',
200 batch_size=10000, protocol=
'line')
201 if influxdb_prev_success ==
False:
202 influxdb_prev_success =
True
203 send_email(get_notify(config[
"OUTPUT"]),
"InfluxDB is reachable again.")
204 except (ConnectionRefusedError, ConnectionError)
as err:
205 print(
"Error writing to InfluxDB - ignoring...")
206 if influxdb_prev_success ==
True:
207 influxdb_prev_success =
False
208 send_email(get_notify(config[
"OUTPUT"]),
"InfluxDB is unreachable!", str(err))
210 print(
"Skip writing to InfluxDB due to no configuration.")
213 for test_name, test_params
in config[
"WEB"].items():
214 result_http = test_http(test_params[
"HTTP"])
215 print(
"Checking website for " + test_name +
" (" + test_params[
"HTTP"] +
"): " + str(result_http))
216 success = result_http[
"success"]
217 if not success
or http_failed[test_name] > 0:
219 if http_failed[test_name] > allow_failed_check:
220 send_email(get_notify(config[
"OUTPUT"]), test_name +
" - Web Interface state changed to UP")
221 http_failed[test_name] = 0
223 if http_failed[test_name] == allow_failed_check:
224 send_email(get_notify(config[
"OUTPUT"]), test_name +
" - Web Interface state changed to *** DOWN ***")
225 http_failed[test_name] += 1
227 sleep(config[
"INTERVAL_SEC"])
229 except Exception
as e:
230 print(
"An exception has been occured with the following traceback info: ")
231 print(traceback.format_exc())
232 send_email(config[
"LOG"],
"Monitoring script terminated!", traceback.format_exc())
235if __name__ ==
"__main__":