From f0802c7b347c5f470ae1c3ef0166ff8df3120723 Mon Sep 17 00:00:00 2001 From: boukandouramhamed Date: Fri, 30 Aug 2024 15:51:58 +0100 Subject: [PATCH] Add a python script parser for geoserver audit logs --- utils/README.md | 150 ++++++++++++++++++++ utils/geoserver_audit_log_parser.py | 206 ++++++++++++++++++++++++++++ 2 files changed, 356 insertions(+) create mode 100644 utils/README.md create mode 100755 utils/geoserver_audit_log_parser.py diff --git a/utils/README.md b/utils/README.md new file mode 100644 index 0000000..5a78abf --- /dev/null +++ b/utils/README.md @@ -0,0 +1,150 @@ +This script parses the geoserver audit logs files. + +### DIRECTORY + +The script can list logs of a one or multiple directories using the `--directory` argument or `-d` + +``` +tomcat@wms-geoserver-slave-set-0:/var/geoserver/audits$ ./parser.py --directory "geoserver-wms-geoserver-slave-set-0-Slave/, geoserver-wms-geoserver-slave-set-1-Slave/" +Level Starttime Remoteaddr Operation Errormessage +---------------------------------------------------------------------------------------------------------------------------------------------- +Parsing file: geoserver-wms-geoserver-slave-set-0-Slave/geoserver_audit_20240830_190073.log +info 2024-08-30T14:10:07.182Z 85.131.101.113 GetCapabilities +info 2024-08-30T14:10:06.029Z 212.128.98.139 dispatch +info 2024-08-30T14:10:05.804Z 176.116.124.187 dispatch +info 2024-08-30T14:10:04.059Z 10.42.20.84 GetMap +info 2024-08-30T14:10:05.663Z 10.12.81.202 +info 2024-08-30T14:10:05.663Z 10.12.81.202 +info 2024-08-30T14:10:04.503Z 10.42.20.84 GetMap +info 2024-08-30T14:10:04.498Z 10.42.20.84 GetMap +info 2024-08-30T14:10:04.5Z 10.42.20.84 GetMap +``` + +### SINCE + +The script can also list logs of the minutes/hours/days using the `--since` argument or `-s` followed with time unit (m: for minutes, h: for hours, d: for days). + +``` +$ ./parser.py --directory geoserver-wms-geoserver-slave-set-6-Slave --since 1m +Level Starttime Remoteaddr Operation Errormessage +---------------------------------------------------------------------------------------------------------------------------------------------- +Parsing file: geoserver-wms-geoserver-slave-set-6-Slave/geoserver_audit_20240830_185008.log +info 2024-08-30T14:42:59.337Z 197.253.114.164 dispatch +info 2024-08-30T14:42:59.208Z 3.29.54.48 dispatch +info 2024-08-30T14:42:59.048Z 80.249.72.62 dispatch +info 2024-08-30T14:42:59.044Z 80.249.72.62 dispatch +info 2024-08-30T14:42:58.341Z 46.1.136.47 GetFeatureInfo +info 2024-08-30T14:42:58.08Z 212.35.78.65 dispatch +info 2024-08-30T14:42:57.455Z 197.214.13.151 dispatch +``` + +### LIMIT + +Also it is possible to use the `--limit` or `-l` argument to limit the logs to specific number of lines. + +``` +$ ./parser.py --directory geoserver-wms-geoserver-slave-set-6-Slave --limit 4 +Level Starttime Remoteaddr Operation Errormessage +---------------------------------------------------------------------------------------------------------------------------------------------- +Parsing file: geoserver-wms-geoserver-slave-set-6-Slave/geoserver_audit_20240830_185008.log +info 2024-08-30T14:44:00.021Z 93.46.165.165 GetMap +info 2024-08-30T14:44:00.161Z 193.137.20.13 dispatch +info 2024-08-30T14:43:59.933Z 93.46.165.165 GetMap +info 2024-08-30T14:43:59.745Z 93.46.165.165 GetMap +``` + +### ERRORS ONLY + +The `--errors-only` or `-e` flag is also added to print errors only. + +``` +$ ./parser.py --directory geoserver-wms-geoserver-slave-set-0-Slave --since 10m --limit 10 --errors-only +Level Starttime Remoteaddr Operation Errormessage +---------------------------------------------------------------------------------------------------------------------------------------------- +Parsing file: geoserver-wms-geoserver-slave-set-0-Slave/geoserver_audit_20240830_190080.log +error 2024-08-30T14:46:07.195Z 10.42.32.221 Invalid date: 2024-8-30T14:15:0:00.000Z +error 2024-08-30T14:46:07.097Z 10.42.32.221 Invalid date: 2024-8-30T14:15:0:00.000Z +error 2024-08-30T14:46:06.599Z 10.42.32.221 Invalid date: 2024-8-30T14:15:0:00.000Z +error 2024-08-30T14:46:06.497Z 10.42.32.221 Invalid date: 2024-8-30T14:15:0:00.000Z +error 2024-08-30T14:46:06.402Z 10.42.32.221 Invalid date: 2024-8-30T14:15:0:00.000Z +error 2024-08-30T14:46:06.401Z 10.42.32.221 Invalid date: 2024-8-30T14:15:0:00.000Z +error 2024-08-30T14:46:06.401Z 10.42.32.221 Invalid date: 2024-8-30T14:15:0:00.000Z +``` + +### FIELDS + +By default, the script return five (5) default fields. The log level, timestamp, sourceIP, type of request and error message. However additional fields can be added using the `--fields` or `-f` flag. + +``` +$ ./parser.py --directory geoserver-wms-geoserver-slave-set-0-Slave --since 10m --limit 10 --fields path,layer +Level Starttime Remoteaddr Operation Errormessage Path Layer +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ +Parsing file: geoserver-wms-geoserver-slave-set-0-Slave/geoserver_audit_20240830_190080.log +info 2024-08-30T14:47:23.017Z 85.76.50.190 GetCapabilities /msg_fes/rgb_convection/wms +info 2024-08-30T14:47:22.806Z 77.111.247.58 GetMap /msg_fes/wms msg_fes:rdt +info 2024-08-30T14:47:21.959Z 51.255.198.134 GetMap /ows osmgray:ne_10m_bathymetry, osmgray:simplified_land_polygons, osmgray:land_polygons, osmgray:builtup_area, osmgray:waterareas, osmgray:waterways, msg_fes:rgb_eview, backgrounds:ne_10m_coastline, osmgray:ne_10m_admin_1_states_provinces_lines, osmgray:ne_10m_admin_0_boundary_lines_land +info 2024-08-30T14:47:21.666Z 94.65.88.90 dispatch /gwc/service/wmts +info 2024-08-30T14:47:21.377Z 94.65.88.90 GetMap /msg_fes/wms msg_fes:rgb_airmass +info 2024-08-30T14:47:21.008Z 94.65.88.90 GetMap /msg_fes/wms msg_fes:rgb_airmass +info 2024-08-30T14:47:19.668Z 94.65.88.90 GetMap /msg_fes/wms msg_fes:rgb_airmass +info 2024-08-30T14:47:19.524Z 131.228.2.16 GetCapabilities /msg_fes/rgb_convection/wms +info 2024-08-30T14:47:19.505Z 197.253.114.66 dispatch /gwc/service/wmts +info 2024-08-30T14:47:19.132Z 185.44.145.1 GetMap /msg_fes/wms msg_fes:rdt +``` + +The list of supported fields can be found using the `--help` flag. + +``` +tomcat@wms-geoserver-slave-set-0:/var/geoserver/audits$ ./parser.py --help +usage: parser.py [-h] [-e] [-s SINCE] [-d DIRECTORY] [-f FIELDS] [-t] [--ip IP] [-l LIMIT] + +Parse and highlight errors/warnings in a CSV log file. + +optional arguments: + -h, --help show this help message and exit + -e, --errors-only Only display errors from the log file + -s SINCE, --since SINCE + Filter files modified within this time period (e.g., '15m' for 15 minutes) + -d DIRECTORY, --directory DIRECTORY + Path to the directory containing log files + -f FIELDS, --fields FIELDS + Comma-separated list of additional fields to include in the output Supported fields: 'id', 'internalHost', 'service', 'owsVersion', 'operation', + 'subOperation', 'layer', 'bbox', 'path', 'queryString', 'bodyAsString', 'httpMethod', 'startTime', 'endTime', 'totalTime', 'remoteAddr', 'remoteUser', + 'remoteUserAgent', 'responseStatus', 'responseLength', 'responseContentType', 'error', 'errorMessage' + -t, --tail Continuously monitor the directory for new log files and process them + --ip IP Filter log entries by IP address + -l LIMIT, --limit LIMIT + Limit the number of log lines to print +tomcat@wms-geoserver-slave-set-0:/var/geoserver/audits$ +``` + +### SOURCE IP + +It is also possible to filter logs by IP using the `--ip` argument. + +``` +$ ./parser.py --directory geoserver-wms-geoserver-slave-set-0-Slave --since 10m --limit 4 --ip 77.111.247.58 +Level Starttime Remoteaddr Operation Errormessage +---------------------------------------------------------------------------------------------------------------------------------------------- +Parsing file: geoserver-wms-geoserver-slave-set-0-Slave/geoserver_audit_20240830_190080.log +info 2024-08-30T14:48:00.554Z 77.111.247.58 GetFeatureInfo +info 2024-08-30T14:47:54.282Z 77.111.247.58 GetFeatureInfo +info 2024-08-30T14:47:49.529Z 77.111.247.58 dispatch +info 2024-08-30T14:47:47.256Z 77.111.247.58 GetMap +``` + +### ALL ARGS. + +All arguements can be combined in one execution. + +``` +$ ./parser.py --directory geoserver-wms-geoserver-slave-set-6-Slave --since 10m --limit 5 --fields path,remoteUser --errors-only --ip 10.42.32.221 +Level Starttime Remoteaddr Operation Errormessage Path Remoteuser +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ +Parsing file: geoserver-wms-geoserver-slave-set-6-Slave/geoserver_audit_20240830_185006.log +error 2024-08-30T14:32:02.695Z 10.42.32.221 Could not find layer msg_rss:rgb_airmass_nrt /wms anonymous +error 2024-08-30T14:32:01.498Z 10.42.32.221 Could not find layer msg_rss:rgb_airmass_nrt /wms anonymous +error 2024-08-30T14:31:02.128Z 10.42.32.221 GetMap /wms anonymous +error 2024-08-30T14:31:02.715Z 10.42.32.221 Invalid date: 2024-8-24T18:30:0:00.000Z /wms anonymous +error 2024-08-30T14:31:02.697Z 10.42.32.221 Invalid date: 2024-8-24T18:30:0:00.000Z /wms anonymous +``` diff --git a/utils/geoserver_audit_log_parser.py b/utils/geoserver_audit_log_parser.py new file mode 100755 index 0000000..56d3951 --- /dev/null +++ b/utils/geoserver_audit_log_parser.py @@ -0,0 +1,206 @@ +#!/usr/bin/python3 + +import csv +import argparse +import os +import glob +from datetime import datetime, timedelta + +# Define the width for each field/column +column_widths = { + 'id': 25, + 'internalHost': 25, + 'service': 25, + 'owsVersion': 25, + 'operation': 25, + 'subOperation': 25, + 'layer': 25, + 'bbox': 80, + 'path': 25, + 'queryString': 400, + 'bodyAsString': 25, + 'httpMethod': 25, + 'startTime': 35, + 'endTime': 35, + 'totalTime': 25, + 'remoteAddr': 25, + 'remoteUser': 25, + 'remoteUserAgent': 25, + 'responseStatus': 25, + 'responseLength': 25, + 'responseContentType': 25, + 'error': 25, + 'errorMessage': 50 +} + +all_fields = ['id', + 'internalHost', + 'service', + 'owsVersion', + 'operation', + 'subOperation', + 'layer', + 'bbox', + 'path', + 'queryString', + 'bodyAsString', + 'httpMethod', + 'startTime', + 'endTime', + 'totalTime', + 'remoteAddr', + 'remoteUser', + 'remoteUserAgent', + 'responseStatus', + 'responseLength', + 'responseContentType', + 'error', + 'errorMessage'] + +# Parse CSV log file +def parse_log_file(audit_files, errors_only=False, ip=None, limit=None, start_position=0): + + # Limit counter + lines_printed = 0 + limit_exit = False + + for file_path in audit_files: + if limit_exit: + break + print(f"Parsing file: {file_path}") + + # Open the file manually + file = open(file_path, mode='r') + + try: + csv_reader = csv.DictReader(file, fieldnames=all_fields) + + for row in reversed(list(csv_reader)): + remoteAddr = row.get('remoteAddr', '') + state = row.get('error', '').lower() if row.get('error') else '' + level = 'error' if state == 'true' else 'info' + + # Build the formatted output with custom column widths + formatted_output = f"{level:<7}" + for field in fields_to_parse: + value = row.get(field, "") + width = column_widths.get(field, 25) # Default to 20 if no custom width is specified + formatted_output += f"{value:<{width}}" + + # Filter by errors if the --errors-only flag is set + if errors_only and level != 'error': + continue + + # Filter by IP if the --ip flag is set + if ip and ip != remoteAddr: + continue + + print(formatted_output, end="\n", flush=True) + + lines_printed += 1 + if limit and lines_printed >= limit: + limit_exit = True + break + finally: + file.close() # Ensure the file is closed properly + +def get_audit_files(directory, time_limit=None): + + log_files = glob.glob(os.path.join(directory, '*.log')) + log_files.sort(reverse=True) + + audit_files = [] + now = datetime.now() + + for file_path in log_files: + file_outside_limit = True + if time_limit: + # Get the file's last modification time + mod_time = datetime.fromtimestamp(os.path.getmtime(file_path)) + # Check if the file is within the time limit + if now - mod_time <= time_limit: + file_outside_limit = False + audit_files.append(file_path) + if file_outside_limit: + return audit_files + else: + audit_files.append(file_path) + return audit_files + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Parse and highlight errors/warnings in a CSV log file.") + parser.add_argument('-e', '--errors-only', action='store_true', help="Only display errors from the log file") + parser.add_argument('-s', '--since', help="Filter files modified within this time period (e.g., '15m' for 15 minutes)", type=str) + parser.add_argument('-d', '--directory', help="Path to the directory containing log files") + parser.add_argument('-f', '--fields', help="""Comma-separated list of additional fields to include in the output \n + Supported fields: + 'id', + 'internalHost', + 'service', + 'owsVersion', + 'operation', + 'subOperation', + 'layer', + 'bbox', + 'path', + 'queryString', + 'bodyAsString', + 'httpMethod', + 'startTime', + 'endTime', + 'totalTime', + 'remoteAddr', + 'remoteUser', + 'remoteUserAgent', + 'responseStatus', + 'responseLength', + 'responseContentType', + 'error', + 'errorMessage' + """) + parser.add_argument('-t', '--tail', action='store_true', help="Continuously monitor the directory for new log files and process them") + parser.add_argument('--ip', help="Filter log entries by IP address", type=str) + parser.add_argument('-l', '--limit', help="Limit the number of log lines to print", type=int, required=False) + + + args = parser.parse_args() + + # Parse the fields argument as a comma-separated string + fields = args.fields.split(",") if args.fields else None + + # Parse the directory argument + directories = args.directory.split(",") if args.directory else './' + + # Parse the --since argument + time_period = args.since + time_limit = None + if time_period: + if time_period.endswith('m'): + minutes = int(time_period[:-1]) + time_limit = timedelta(minutes=minutes) + elif time_period.endswith('h'): + hours = int(time_period[:-1]) + time_limit = timedelta(hours=hours) + elif time_period.endswith('d'): + days = int(time_period[:-1]) + time_limit = timedelta(days=days) + else: + raise ValueError("Unsupported time period format. Use 'm' for minutes or 'h' for hours.") + + # Print header dynamically based on fields + header = f"{'Level':<7}" + default_fields = ['startTime', 'remoteAddr', 'operation', 'errorMessage'] + fields_to_parse = default_fields if fields is None else default_fields + fields + for field in fields_to_parse: + width = column_widths.get(field, 25) + header += (f"{field.capitalize():<{width}}") + print(header) + print('-' * len(header)) + + # Get the audit log files + for directory in directories: + audit_files = get_audit_files(directory, time_limit) + if audit_files: + parse_log_file(audit_files, errors_only=args.errors_only, ip=args.ip, limit=args.limit) + else: + print("No log files found.")