-
Notifications
You must be signed in to change notification settings - Fork 0
/
filter.ml
175 lines (158 loc) · 6.16 KB
/
filter.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
let arrayfind f a =
let i = ref 0 in
let l = Array.length a in
while !i < l && not (f a.(!i)) do i := !i + 1 done;
if !i != l then Some !i else None
let foldfor f initial n =
let rec next acc i = if i = n then acc else next (f acc i) (i+1)
in next initial 0
let rec listfind f l =
match l with
[] -> None
| (h::t) -> (match f h with None -> listfind f t | v -> v)
;;
(* We retain a buffer of recent messages, and note whether they matched
one of the "summarise" entries so that later matches to the same
entry get included. *)
type msgrecord = Normal
| Summarise of Pcre.regexp * string array
(* Put a message into the buffer (or increment the count against a
matching message in the buffer). If the buffer is full, we return
the oldest message. *)
let pushmsg summarise (outbuffer, nextindex) (time,host,message) =
let dupcheck (host,message) (_,host',message',record,_,_) =
if host <> host' then false else
match record with
Normal -> message = message'
| Summarise (rex, details') ->
try
let details = Pcre.extract ~rex:rex ~full_match:false message in
details = details'
with Not_found -> false
in match arrayfind (dupcheck (host,message)) outbuffer with
Some i ->
let (t,h,m,r,c,l) = outbuffer.(i) in
outbuffer.(i) <- (t,h,m,r,c+1,time);
None
| None ->
let (t,_,_,_,_,_) as old = outbuffer.(!nextindex) in
let trysummarise m r =
try Some (r, Pcre.extract ~rex:r ~full_match:false m)
with Not_found -> None
in
let record = match listfind (trysummarise message) summarise with
None -> Normal
| Some (rex,details) -> Summarise (rex,details)
in
outbuffer.(!nextindex) <- (time,host,message,record,1,time);
nextindex := (!nextindex + 1) mod Array.length outbuffer;
if t = "" then None else Some old
let print_line (t,h,s,record,count,last) =
if t = "" then () else
print_endline (t ^ " " ^ h ^ " " ^ s);
if count > 1
then match record with
Normal -> Printf.printf "%s repeated %d times, ending at %s.\n"
t count last
| Summarise _ -> Printf.printf "%s and %d similar %s, ending at %s.\n"
t (count-1) (if count > 2 then "entries" else "entry") last
else ()
let malformed = ref 0
let maxmalformed = 50
let print_malformed s =
malformed := !malformed + 1;
if !malformed = maxmalformed + 1
then print_endline "* Too many malformed lines, suppressing rest."
else begin
if !malformed > maxmalformed
then ()
else print_string "Malformed log message: "; print_endline s
end
let checkignore ignorable service m =
let rs = Hashtbl.find_all ignorable service in
if List.exists (fun r -> Pcre.pmatch ~rex:r m) rs then true else
let rs' = Hashtbl.find_all ignorable "" in
List.exists (fun r -> Pcre.pmatch ~rex:r m) rs'
(* Separate time, host and message, and find out the service if there is one. *)
let timehost_r = Pcre.regexp
"^(\\S+ +\\S+ +\\S+) +(\\S+) +((?:([^ :[]+)(?:\\[[0-9]+\\])?:)?.*\\S)\\s*$";;
let filter ignorable outbuffer summaries s =
try
let parts = Pcre.extract ~rex:timehost_r s in
let (time, host, message, service) =
(parts.(1), parts.(2), parts.(3), parts.(4)) in
if checkignore ignorable service message
then () else
match pushmsg summaries outbuffer (time, host, message) with
None -> ()
| Some v -> print_line v
with Not_found -> print_malformed s
;;
(* Note the list reversal to ensure the regexps are in the same order as the
file. *)
let readregexps filename =
let addregexp a l =
try (Pcre.regexp l)::a
with Pcre.Error (Pcre.BadPattern (msg,pos)) ->
Printf.eprintf "Bad regexp: %s at position %d\n %s\n%!" msg pos l;
a
in
List.rev (File.foldfile filename addregexp [])
;;
(* Might be more efficient if stored (and updated) a list in the hash table,
rather than using find_all above. *)
let grabservice = Pcre.regexp
"^\\^([^ \\\\.\\[|(?*+{:]+)?(?:\\\\\\[\\[0-9\\]\\+\\\\\\])?:";;
let mkignore filename =
let insert a l =
let service =
try (Pcre.extract ~rex:grabservice ~full_match:false l).(0)
with Not_found -> ""
in try (Hashtbl.add a service (Pcre.regexp l); a)
with Pcre.Error (Pcre.BadPattern (msg,pos)) ->
Printf.eprintf "Bad regexp: %s at position %d\n %s\n%!" msg pos l;
a
in File.foldfile filename insert (Hashtbl.create 650)
let remaining_iter f (outbuffer, pos) =
let l = Array.length outbuffer in
for i = !pos to !pos + l - 1 do
f outbuffer.(i mod l)
done
;;
let filenames = ref [] in
let bufsize = ref 25 in
let ignorefile = ref "/etc/piperlog/ignore" in
let summaryfile = ref "/etc/piperlog/summarise" in
let addfilename s = filenames := s::(!filenames) in
Arg.parse [
("--buffer-size", Arg.Int
(fun i -> if i > 0 then bufsize := i
else raise (Arg.Bad "Buffer size must be at least 1")),
"Size of output buffer");
("--ignore-file", Arg.Set_string ignorefile,
"File containing patterns to ignore");
("--summary-file", Arg.Set_string summaryfile,
"File containing patterns to summarise");
("--", Arg.Rest addfilename, "File to be filtered")
] addfilename
("Usage: piperlog [--buffer-size <size>] [--ignore-file <file name>]\n" ^
" [--summary-file <file name>] [[--] <file name> ...]");
filenames := List.rev !filenames;
let ignorable = mkignore !ignorefile in
let summarisable = readregexps !summaryfile in
let outbuffer = (Array.make !bufsize ("", "", "", Normal, 0, ""), ref 0) in
let linecount = ref 0 in
let processline l =
linecount := !linecount + 1;
filter ignorable outbuffer summarisable l
in
let processchannel inchan = Pcre.foreach_line ~ic:inchan processline in
let processfile filename =
let channel = open_in filename in
processchannel channel;
close_in channel
in
(match !filenames with [] -> processchannel stdin
| _ -> List.iter processfile !filenames);
remaining_iter print_line outbuffer;
Printf.printf "\nSummary produced from %d lines of input by piperlog.\n%!" !linecount;;