diff --git a/README.md b/README.md index 25ee7c8..0eb4c4e 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,38 @@ + +```text + _ +| | +| | ___ __ _ _ __ __ _ _ __ ___ ___ +| |/ _ \ / _` | '_ \ / _` | '__/ __|/ _ \ +| | (_) | (_| | |_) | (_| | | \__ \ __/ +|_|\___/ \__, | .__/ \__,_|_| |___/\___| + __/ | | + |___/|_| +``` + # logparse -Logparse is a shell script that parses Caddy JSON log files and outputs them as formatted text. It supports the Common Log Format and Combined Log Format, usually used by traditional HTTP servers like Apache, however it is easy to customise the output using the --selector option or by using a config file. Logparse uses the powerful jq tool to process the JSON logs. \ No newline at end of file +Logparse is a shell script that parses Caddy JSON log files and outputs them as formatted text. It supports the Common Log Format and Combined Log Format, usually used by traditional HTTP servers like Apache, however it is easy to customise the output using the --selector option. + +The config file allows for changing default output format, add additional selectors and adapt logparse to other JSON sources. Logparse uses the powerful `jq` tool to process the JSON files. + +## Requirements + +* Bash 5.1: Logparse is using associative arrays which is not available in plain POSIX shell scripts. +* jq 1.7: Tested with jq 1.7.1. + +## Usage + +```text +Usage: /usr/bin/logparse [-c | -C | -s "selectors"] [-F ] filename +Options: + -c, --common Apache Common Log Format (default) + -C, --combined Apache Combined Log Format + -s, --selector Use a space separated list of selectors + -F, --config-file Use a configuration file + -h, --help Show this help message and exit +``` + +## Examples + +Example usage and descriptions is available on the wiki https://wiki.tnonline.net/w/Linux/logparse \ No newline at end of file diff --git a/examples/combined-log-format.log b/examples/combined-log-format.log new file mode 100644 index 0000000..d89577c --- /dev/null +++ b/examples/combined-log-format.log @@ -0,0 +1,4 @@ +10.10.10.117 - [2024-05-26 11:42:52] "GET /gentoo/gentoo-distfiles/distfiles/9b/tl-hrlatex.source-2021.tar.xz HTTP/2.0" 200 3768 "null" "facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)" +10.10.10.134 - [2024-05-26 11:43:50] "GET /gentoo/gentoo-portage/app-emulation/metadata.xml HTTP/2.0" 200 446 "null" "Mozilla/5.0 (compatible; AhrefsBot/7.0; +http://ahrefs.com/robot/)" +10.10.10.132 - [2024-05-26 11:44:00] "GET /gentoo/gentoo-distfiles/distfiles/b6/libvirt-python-9.9.0.tar.gz HTTP/2.0" 200 246357 "null" "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm) Chrome/116.0.1938.76 Safari/537.36" +240e:1:1::1234 - [2024-05-26 00:22:43] "GET /res/browse.css HTTP/3.0" 200 1577 "https://mirrors.tnonline.net/gentoo/gentoo-portage/app-dicts/aspell-lt/" "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) HeadlessChrome/117.0.5938.60 Safari/537.36" diff --git a/examples/common-log-format.log b/examples/common-log-format.log new file mode 100644 index 0000000..4d3bfe4 --- /dev/null +++ b/examples/common-log-format.log @@ -0,0 +1,4 @@ +10.10.10.117 - [2024-05-26 11:42:52] "GET /gentoo/gentoo-distfiles/distfiles/9b/tl-hrlatex.source-2021.tar.xz HTTP/2.0" 200 3768 +10.10.10.134 - [2024-05-26 11:43:50] "GET /gentoo/gentoo-portage/app-emulation/metadata.xml HTTP/2.0" 200 446 +10.10.10.132 - [2024-05-26 11:44:00] "GET /gentoo/gentoo-distfiles/distfiles/b6/libvirt-python-9.9.0.tar.gz HTTP/2.0" 200 246357 +240e:1:1::1234 - [2024-05-26 00:22:43] "GET /res/browse.css HTTP/3.0" 200 1577 diff --git a/examples/json-lines.log b/examples/json-lines.log new file mode 100644 index 0000000..f4c12a6 --- /dev/null +++ b/examples/json-lines.log @@ -0,0 +1,4 @@ +{"level":"info","ts":1716723772.6014807,"logger":"http.log.access.log17","msg":"handled request","request":{"remote_ip":"10.5.1.1","remote_port":"38574","client_ip":"10.10.10.117","proto":"HTTP/2.0","method":"GET","host":"mirrors.tnonline.net:400","uri":"/gentoo/gentoo-distfiles/distfiles/9b/tl-hrlatex.source-2021.tar.xz","headers":{"Accept":["*/*"],"User-Agent":["facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)"],"X-Forwarded-For":["10.10.10.117"],"X-Forwarded-Proto":["https"],"X-Forwarded-Host":["mirrors.tnonline.net"],"Accept-Encoding":["gzip"]}},"bytes_read":0,"user_id":"","duration":0.017557019,"size":3768,"status":200,"resp_headers":{"X-Content-Type-Options":["nosniff"],"X-Frame-Options":["DENY"],"Vary":["Accept-Encoding"],"Content-Length":["3768"],"Strict-Transport-Security":["max-age=31968001; preload"],"Content-Security-Policy":["default-src 'self'"],"Last-Modified":["Thu, 01 Apr 2021 22:01:15 GMT"],"Permissions-Policy":["none"],"Cache-Control":["max-age=2592000, public"],"Content-Type":["application/x-xz"],"Accept-Ranges":["bytes"],"Server":["Caddy"],"Referrer-Policy":["same-origin"],"Etag":["\"cacq4sswpg5c2wo\""]}} +{"level":"info","ts":1716723830.5081363,"logger":"http.log.access.log17","msg":"handled request","request":{"remote_ip":"10.5.1.1","remote_port":"38574","client_ip":"10.10.10.134","proto":"HTTP/2.0","method":"GET","host":"mirrors.tnonline.net:400","uri":"/gentoo/gentoo-portage/app-emulation/metadata.xml","headers":{"X-Forwarded-For":["10.10.10.134"],"X-Forwarded-Proto":["https"],"X-Forwarded-Host":["mirrors.tnonline.net"],"User-Agent":["Mozilla/5.0 (compatible; AhrefsBot/7.0; +http://ahrefs.com/robot/)"],"Accept":["*/*"],"Accept-Encoding":["deflate, gzip, br"]}},"bytes_read":0,"user_id":"","duration":0.000724332,"size":446,"status":200,"resp_headers":{"Referrer-Policy":["same-origin"],"Strict-Transport-Security":["max-age=31968001; preload"],"Content-Encoding":["br"],"Server":["Caddy"],"X-Frame-Options":["DENY"],"Vary":["Accept-Encoding"],"Permissions-Policy":["none"],"X-Content-Type-Options":["nosniff"],"Cache-Control":["max-age=62, public"],"Etag":["\"ce73jyctvym8wj-br\""],"Content-Security-Policy":["default-src 'self'"],"Content-Type":["text/xml; charset=utf-8"],"Last-Modified":["Sat, 11 Sep 2021 13:40:11 GMT"]}} +{"level":"info","ts":1716723840.6541977,"logger":"http.log.access.log17","msg":"handled request","request":{"remote_ip":"10.5.1.1","remote_port":"38574","client_ip":"10.10.10.132","proto":"HTTP/2.0","method":"GET","host":"mirrors.tnonline.net:400","uri":"/gentoo/gentoo-distfiles/distfiles/b6/libvirt-python-9.9.0.tar.gz","headers":{"X-Forwarded-Proto":["https"],"X-Forwarded-Host":["mirrors.tnonline.net"],"Accept-Encoding":["gzip, deflate, br"],"User-Agent":["Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm) Chrome/116.0.1938.76 Safari/537.36"],"Accept":["*/*"],"From":["bingbot(at)microsoft.com"],"X-Forwarded-For":["10.10.10.132"]}},"bytes_read":0,"user_id":"","duration":0.024626918,"size":246357,"status":200,"resp_headers":{"Accept-Ranges":["bytes"],"Content-Length":["246357"],"Permissions-Policy":["none"],"X-Content-Type-Options":["nosniff"],"Vary":["Accept-Encoding"],"X-Frame-Options":["DENY"],"Server":["Caddy"],"Cache-Control":["max-age=2592000, public"],"Last-Modified":["Wed, 01 Nov 2023 09:44:15 GMT"],"Etag":["\"cwndms6wbr405a39\""],"Content-Type":["application/gzip"],"Content-Security-Policy":["default-src 'self'"],"Referrer-Policy":["same-origin"],"Strict-Transport-Security":["max-age=31968001; preload"]}} +{"level":"info","ts":1716682963.4146512,"logger":"http.log.access.log16","msg":"handled request","request":{"remote_ip":"240e:1:1::1234","remote_port":"62769","client_ip":"240e:1:1::1234","proto":"HTTP/3.0","method":"GET","host":"mirrors.tnonline.net","uri":"/res/browse.css","headers":{"Sec-Fetch-Mode":["no-cors"],"Sec-Fetch-Dest":["style"],"Referer":["https://mirrors.tnonline.net/gentoo/gentoo-portage/app-dicts/aspell-lt/"],"Accept-Encoding":["gzip, deflate, br"],"Accept":["text/css,*/*;q=0.1"],"Sec-Fetch-Site":["same-origin"],"User-Agent":["Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) HeadlessChrome/117.0.5938.60 Safari/537.36"]},"tls":{"resumed":false,"version":772,"cipher_suite":4865,"proto":"h3","server_name":"mirrors.tnonline.net"}},"bytes_read":0,"user_id":"","duration":0.000275783,"size":1577,"status":200,"resp_headers":{"Cache-Control":["max-age=2592000, public"],"Vary":["Accept-Encoding"],"Last-Modified":["Sat, 09 Sep 2023 19:56:35 GMT"],"Date":["Sun, 26 May 2024 00:22:43 GMT"],"X-Frame-Options":["DENY"],"Content-Security-Policy":["default-src 'self'"],"Strict-Transport-Security":["max-age=31968001; preload"],"X-Content-Type-Options":["nosniff"],"Etag":["\"cvengqvj61vk17t\""],"Accept-Ranges":["bytes"],"Server":["Caddy"],"Permissions-Policy":["none"],"Content-Type":["text/css; charset=utf-8"],"Referrer-Policy":["same-origin"],"Content-Encoding":["br"]}} \ No newline at end of file diff --git a/logparse b/logparse new file mode 100755 index 0000000..8e19ba4 --- /dev/null +++ b/logparse @@ -0,0 +1,335 @@ +#!/bin/bash + +# shellcheck shell=bash + +# Caddy webserver JSON log parser +# version 0.1.0 +# +# This script reads a Caddy logfile in JSON format and +# outputs it in Apache Common Log Format. +# +# SPDX-License-Identifier: GPL-3.0-or-later +# Copyright 2024 Forza + + +# Default output format +format="common" +# Default config file +config_file="" + +# JSON selectors available in Caddy JSON logs +# shellcheck disable=SC2016 +declare -A selectors=( + [ts]='.ts' + [datetime]='.ts | strftime("%Y-%m-%d %H:%M:%S")' + [datetime_l]='.ts | strflocaltime("%Y-%m-%d %H:%M:%S %Z")' + [datetime_ms]='(.ts | tostring | split(".") | .[1][:3]) as $ms | .ts | strflocaltime("%Y-%m-%d %H:%M:%S.") + $ms + strflocaltime(" %Z")' + [datetime_iso]='.ts | todateiso8601' + [client_ip]='.request.client_ip' + [remote_ip]='.request.remote_ip' + [remote_port]='.request.remote_port' + [proto]='.request.proto' + [method]='.request.method' + [host]='.request.host' + [uri]='.request.uri' + [user_agent]='.request.headers["User-Agent"][0]' + [referer]='.request.headers.Referer[0]' + [accept]='.request.headers.Accept[0]' + [accept_encoding]='.request.headers["Accept-Encoding"][0]' + [tls_resumed]='.request.tls.resumed' + [tls_version]='.request.tls.version as $version | + if $version == 769 then "TLS 1.0" + elif $version == 770 then "TLS 1.1" + elif $version == 771 then "TLS 1.2" + elif $version == 772 then "TLS 1.3" + else $version end' + [tls_cipher_suite]='.request.tls.cipher_suite as $cs | + if $cs == 5 then "TLS_RSA_WITH_RC4_128_SHA" + elif $cs == 10 then "TLS_RSA_WITH_3DES_EDE_CBC_SHA" + elif $cs == 47 then "TLS_RSA_WITH_AES_128_CBC_SHA" + elif $cs == 53 then "TLS_RSA_WITH_AES_256_CBC_SHA" + elif $cs == 60 then "TLS_RSA_WITH_AES_128_CBC_SHA256" + elif $cs == 156 then "TLS_RSA_WITH_AES_128_GCM_SHA256" + elif $cs == 157 then "TLS_RSA_WITH_AES_256_GCM_SHA384" + elif $cs == 49159 then "TLS_ECDHE_ECDSA_WITH_RC4_128_SHA" + elif $cs == 49169 then "TLS_ECDHE_RSA_WITH_RC4_128_SHA" + elif $cs == 49170 then "TLS_ECDHE_RSA_WITH_3DES_EDE_CBC_SHA" + elif $cs == 49171 then "TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA" + elif $cs == 49172 then "TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA" + elif $cs == 49161 then "TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA" + elif $cs == 49162 then "TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA" + elif $cs == 49191 then "TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256" + elif $cs == 49199 then "TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256" + elif $cs == 49187 then "TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256" + elif $cs == 49195 then "TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256" + elif $cs == 49200 then "TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384" + elif $cs == 49196 then "TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384" + elif $cs == 52392 then "TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256" + elif $cs == 52393 then "TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256" + elif $cs == 4865 then "TLS_AES_128_GCM_SHA256" + elif $cs == 4866 then "TLS_AES_256_GCM_SHA384" + elif $cs == 4867 then "TLS_CHACHA20_POLY1305_SHA256" + elif $cs == 22016 then "TLS_FALLBACK_SCSV" + elif $cs == 52392 then "TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305" + elif $cs == 52393 then "TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305" else $cs end' + [tls_proto]='.request.tls.proto' + [tls_server_name]='.request.tls.server_name' + [bytes_read]='.bytes_read' + [user_id]='.user_id' + [duration]='.duration' + [size]='.size' + [status]='.status' + [rh_content_length]='.resp_headers["Content-Length"][0]' + [rh_content_type]='.resp_headers["Content-Type"][0]' + [rh_server]='.resp_headers.Server[0]' + [rh_x_frame_options]='.resp_headers["X-Frame-Options"][0]' + [rh_last_modified]='.resp_headers["Last-Modified"][0]' + [rh_alt_svc]='.resp_headers["Alt-Svc"][0]' + [rh_vary]='.resp_headers.Vary[0]' + [rh_etag]='.resp_headers.Etag[0]' + [rh_x_content_type_options]='.resp_headers["X-Content-Type-Options"][0]' + [rh_referrer_policy]='.resp_headers["Referrer-Policy"][0]' + [rh_cache_control]='.resp_headers["Cache-Control"][0]' + [rh_accept_ranges]='.resp_headers["Accept-Ranges"][0]' + [rh_content_security_policy]='.resp_headers["Content-Security-Policy"][0]' + [rh_strict_transport_security]='.resp_headers["Strict-Transport-Security"][0]' + [rh_permissions_policy]='.resp_headers["Permissions-Policy"][0]' +) + +# Allows adding various placeholders to the output +declare -A placeholders=( + [_]=' ' + [space]=' ' + [tab]='\t' + [q]='\"' + [quote]='\"' + [squote]="'" + [i]='|' + [pipe]='|' + [lsqb]='[' + [rsqb]=']' + [lbrace]='(' + [rbrace]=')' + [lcurly]='{' + [rcurly]='}' + [colon]=':' + [semicolon]=';' + [comma]=',' + [dot]='.' + [slash]='/' + [backslash]='\\' + [hyphen]='-' + [underscore]='_' + [plus]='+' + [equals]='=' + [ampersand]='&' + [percent]='%' + [dollar]='$' + [exclamation]='!' +) + +# Show command line syntax +show_help() { + cat <] filename + +Options: + -c, --common Apache Common Log Format (default) + -C, --combined Apache Combined Log Format + -s, --selector Use a space separated list of selectors + -F, --config-file Use a configuration file + -h, --help Show this help message and exit + +END +} + +die() { + local message="${1:-Died}" + if [ $DEBUG -eq 1 ]; then + echo "${BASH_SOURCE[1]}: line ${BASH_LINENO[0]}: ${FUNCNAME[1]}: $message." >&2 + else + echo "$message" >&2 + fi + exit 1 +} + +debug() { + if [ "$DEBUG" -eq 1 ]; then + if [ -n "$2" ]; then + eval "$2" + else + echo "$1" + fi + fi +} +# Function to output logs in a custom format +log_format() { + # Generate the jq filter string dynamically + local filter_parts="" + local output_parts="" + for var in "$@"; do + debug "log_format: var: $var" + if [ -n "${placeholders[$var]}" ]; then + debug "log_format: placeholder: $var" + output_parts+="\(\$${var})" + filter_parts+=" + | ( \"${placeholders[$var]}\" ) as \$$var" + elif [ -n "${selectors[$var]}" ]; then + debug "log_format: selector: $var" + # 'jq' needs to test if the selector exists and if it is empty + output_parts+="\(\$${var})" + filter_parts+=" + | ( if ${selectors[$var]} == \"\" then \"-\" + else ${selectors[$var]} + end ) as \$$var" + fi + done + + if [ $DEBUG -eq 1 ]; then + echo jq -r ". ${filter_parts} | \"${output_parts}\"" "${file}" + else + jq -r ". ${filter_parts} | \"${output_parts}\"" "${file}" + fi +} + +###################### +# Start main section # +###################### + +# Enable enable debug output +DEBUG=$((DEBUG == 1 ? 1 : 0)) + +debug "main: command_line options: $#" +# Parse command line options +while [ "$#" -gt 0 ]; do + case $1 in + -c|--common) + format="common" + shift + ;; + -C|--combined) + format="combined" + shift + ;; + -F|--config-file) + config_file="$2" + if [ -z "${config_file}" ]; then + die "Error: No configuration file specified." + fi + use_config_file=1 + shift 2 + ;; + -s|--selector) + use_selectors_c="$2" + if [ -z "${use_selectors_c}" ]; then + die "Error: No output selectors specified." + fi + format="custom" + shift 2 + ;; + -h|--help) + show_help + exit 0 + ;; + *) + file="$1" + shift + ;; + esac +done + +# Check if input files are readable +if [ -z "${file}" ]; then + show_help + die "Error: No input file provided." +elif [ ! -r "${file}" ]; then + die "Error: Input file '${file}' is not readable." +fi + +if [ ! -z ${use_config_file+x} ]; then + if [ -r "${config_file}" ]; then + debug "load_config: config_file: '$config_file'" + . "${config_file}" || die "Error: Could not load config file" + debug "load_config: format: $format" + debug "load_config: use_selectors: $use_selectors" + debug - 'for a in "${!placeholders[@]}"; do echo "load_config: placeholders: ${a}: \"${placeholders[$a]}\" " ; done' + debug - 'for a in "${!selectors[@]}"; do echo "load_config: selectors: $a: \"${selectors[$a]}\" " ; done' + else + die "Error: Could not read config file '${config_file}'" + fi +fi + +if [ -n "${use_selectors_c}" ]; then + use_selectors="${use_selectors_c}" + debug "main: use_selectors override: $use_selectors" +fi + +# Process JSON file using the chosen format +debug "main: format: $format" +case "$format" in + common) + # Common Log format + log_format \ + client_ip \ + space user_id \ + space lsqb datetime rsqb \ + space quote method \ + space uri space proto quote \ + space status space size + ;; + combined) + # Combined Log format + # '_' can be used instead of 'space' + # 'q' can be used instead of 'quote' + log_format \ + client_ip \ + _ user_id \ + _ lsqb datetime rsqb \ + _ q method _ uri _ proto q \ + _ status \ + _ size \ + _ q referer q \ + _ q user_agent q + ;; + custom) + debug "main: calling: log_format $use_selectors" + # Only use specified selectors + # shellcheck disable=SC2086 + log_format $use_selectors + ;; +esac + + + +#### +# A note on datetime format in 'jq'. +# 1. there is no subsecond support +# 2. timezone support is currently not available in +# todateiso8601 builtin jq datetime function. +# 3. strftime function can be used, but is platform +# dependant and some formating styles may not be +# implented or may not properly. +# 4. strflocaltime converts UTC source to local timezone. +# 5. jq assumes epoch time to be UTC. +# https://jqlang.github.io/jq/manual/#dates + +#### +# Apaches common and combined log formats are widely supported +# in log readers and easily read by humans. + +# https://httpd.apache.org/docs/current/da/logs.html +# +# Apache Commong Log format: +# "%h %l %u %t \"%r\" %>s %b" + +# Apache Combined LogFormat: +# "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-agent}i\"" +# +# Where: +# %h is the remote host (client IP) +# %l is the remote logname (not used, so we'll use -) +# %u is the authenticated user +# %t is the time the request was received +# %r is the request line from the client ("method uri proto") +# %>s is the status code +# %b is the size of the object returned to the client