Better parsing of mmftpdpasswd(5) file
[mmondor.git] / mmsoftware / apache-mmstat / apache-mmstat.c
CommitLineData
a83cfe4c
MM
1/* $Id: apache-mmstat.c,v 1.1 2003/07/02 08:27:51 mmondor Exp $ */
2
3/*
4 * Copyright (C) 2003, Matthew Mondor
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed by Matthew Mondor.
18 * 4. The name of Matthew Mondor may not be used to endorse or promote
19 * products derived from this software without specific prior written
20 * permission.
21 * 5. Redistribution of source code may not be released under the terms of
22 * any GNU Public License derivate.
23 *
24 * THIS SOFTWARE IS PROVIDED BY MATTHEW MONDOR ``AS IS'' AND ANY EXPRESS OR
25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27 * IN NO EVENT SHALL MATTHEW MONDOR BE LIABLE FOR ANY DIRECT, INDIRECT,
28 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
29 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
30 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
33 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 */
35
36
37
38#include <sys/types.h>
39#include <stdlib.h>
40#include <fcntl.h>
41#include <unistd.h>
42#include <stdio.h>
43#include <syslog.h>
44#include <signal.h>
45
46#include <mmtypes.h>
47#include <mmreadcfg.h> /* Only used for user/group related functions */
48#include <mmstring.h>
49#include <mmstat.h>
50
51
52
53#define LINESIZ 4096
54
55
56
57int main(int, char **);
58static void log_parse(mmstat_t *, char *);
59static void sighandler(int);
60
61
62
63/* Our list of command line arguments */
64enum argline {
65 ARG_COMMAND = 0,
66 ARG_USER,
67 ARG_GROUPS,
68 ARG_CONF,
69 ARG_OPTIONS,
70 ARG_MAX
71};
72
73/* Logline columns which we expect */
74enum logline {
75 COL_VHOST = 0,
76 COL_REMOTEADDR,
77 COL_REFERER,
78 COL_USERAGENT,
79 COL_BYTES,
80 COL_METHOD,
81 COL_REQUEST,
82 COL_STATUS,
83 COL_MAX
84};
85
86
87
88/* Globals */
89bool LOG_GLOBAL; /* G */
90bool LOG_VHOST; /* V */
91bool LOG_REFERER; /* R */
92bool LOG_USERAGENT; /* U */
93bool LOG_REMOTEADDR; /* A */
94bool LOG_REQUEST; /* F */
95
96
97
98int main(int argc, char **argv)
99{
100 char *linebuf;
101 mmstat_t mms;
102 struct sigaction act;
103
104 /* Setup a signal handler for SIGSEGV so that we prevent core dumping if
105 * we ever crash.
106 */
107 act.sa_handler = sighandler;
108 act.sa_flags = SA_NOCLDWAIT;
109 sigemptyset(&act.sa_mask);
110 sigaction(SIGSEGV, &act, NULL);
111
112 /* We're normally started from apache, and run as the superuser. We
113 * therefore do all we can to be safe until we drop privileges... Let's
114 * first redirect unnecessary filedescriptors to /dev/null. But, keep
115 * stdin, of course, which we'll read logs from later on.
116 */
117 {
118 int fd;
119
120 if ((fd = open("/dev/null", O_RDWR)) != -1) {
121 dup2(fd, 1);
122 dup2(fd, 2);
123 if (fd > 2)
124 close(fd);
125 }
126 }
127
128 /* Now perform sanity checking on launching mode and user supplied
129 * arguments.
130 */
131
132 /* Apache launches us as uid 0, we'll drop privileges soon however */
133 if (getuid() != 0) {
134 syslog(LOG_NOTICE, "%s: Not started as uid 0 from apache!? (uid %d)",
135 argv[ARG_COMMAND], getuid());
136 exit(-1);
137 }
138
139 /* We only accept a fixed number of arguments so that we restrict the
140 * need for getopt() and other libraries, or a more complex system when
141 * we're root.
142 */
143 if (argc != ARG_MAX) {
144 syslog(LOG_NOTICE, "%s: Started with wrong parameters",
145 argv[ARG_COMMAND]);
146 exit(-1);
147 }
148
149 /* Make sure that supplied user and group(s) are valid, and if so,
150 * drop privileges already.
151 */
152 {
153 uid_t uid;
154 gid_t *gids;
155 int ngids;
156
157 if ((uid = mmgetuid(argv[ARG_USER])) == -1) {
158 syslog(LOG_NOTICE, "%s: Unknown user '%s'", argv[ARG_COMMAND],
159 argv[ARG_USER]);
160 exit(-1);
161 }
162
163 if (!(gids = mmgetgidarray(&ngids, argv[ARG_GROUPS]))) {
164 syslog(LOG_NOTICE, "%s: One of following groups unknown: '%s'",
165 argv[ARG_COMMAND], argv[ARG_GROUPS]);
166 exit(-1);
167 }
168
169 /* NOTE: mmdropprivs() uses setegid(2), setgid(2), setgroups(2),
170 * seteuid(2), setgid(2), and then verifies that it really changed to
171 * the expected user permissions, in order to return TRUE on success.
172 */
173 if (!mmdropprivs(uid, gids, ngids)) {
174 syslog(LOG_NOTICE, "%s: Cannot change uid and gids to safe privs",
175 argv[ARG_COMMAND]);
176 exit(-1);
177 }
178 mmfreegidarray(gids);
179 }
180
181 /* Et voila, we're no longer the superuser. We can now proceed and
182 * perform our slave chores as mortals. First set the MMSTATCONF
183 * environment variable for mmstat(3) API to load the right configuration
184 * file. Then, call the logging function, just because we want the main
185 * loop out of main().
186 */
187 /* Log nothing by default, enable parts which were requested only. */
188 LOG_GLOBAL = LOG_VHOST = LOG_REFERER = LOG_USERAGENT = LOG_REMOTEADDR =
189 LOG_REQUEST = FALSE;
190 if (mm_strchr(argv[ARG_OPTIONS], 'G'))
191 LOG_GLOBAL = TRUE;
192 if (mm_strchr(argv[ARG_OPTIONS], 'V'))
193 LOG_VHOST = TRUE;
194 if (mm_strchr(argv[ARG_OPTIONS], 'R'))
195 LOG_REFERER = TRUE;
196 if (mm_strchr(argv[ARG_OPTIONS], 'U'))
197 LOG_USERAGENT = TRUE;
198 if (mm_strchr(argv[ARG_OPTIONS], 'A'))
199 LOG_REMOTEADDR = TRUE;
200 if (mm_strchr(argv[ARG_OPTIONS], 'F'))
201 LOG_REQUEST = TRUE;
202
203 if (setenv("MMSTATCONF", argv[ARG_CONF], TRUE) != 0) {
204 syslog(LOG_NOTICE, "%s: Cannot setenv(3)", argv[ARG_COMMAND]);
205 exit(-1);
206 }
207
208 if (!mmstat_init(&mms, TRUE, TRUE)) {
209 syslog(LOG_NOTICE, "%s: Cannot initialize mmstat(3)",
210 argv[ARG_COMMAND]);
211 exit(-1);
212 }
213
214 /* We preferably don't want the line buffer to be on the stack */
215 if ((linebuf = malloc(LINESIZ)) == NULL) {
216 syslog(LOG_NOTICE, "%s: Cannot allocate line buffer",
217 argv[ARG_COMMAND]);
218 exit(-1);
219 }
220
221 log_parse(&mms, linebuf);
222
223 /* NOTREACHED */
224 exit(0);
225}
226
227
228/* ARGSUSED */
229static void sighandler(int sig)
230{
231 /* We only catch SIGSEGV with this handler, and exit normally. */
232 exit(0);
233}
234
235
236/* When we get called, privileges have been revoked and mmstat(3) has been
237 * successfully initialized.
238 */
239static void log_parse(mmstat_t *mms, char *line)
240{
241 char *cols[COL_MAX + 1];
242
243 /* We'll exit if the pipe is closed by apache */
244 while (fgets(line, LINESIZ - 1, stdin) == line) {
245 size_t len;
246 int status = 1;
247 char *ptr;
248
249 /* Strip ending "\n". If there are none, we ignore the line as it
250 * consists of an abnormally long request which exceeds LINESIZ.
251 * It's next continueing line will then obviously not match the
252 * expected columns and will as a result also be ignored. It's
253 * unfortunate that fgets(3) cannot report that the line was not
254 * terminated in a faster way, without us having to go strip the
255 * line termination, but oh well, I don't want to use mmfd(3) for
256 * this. I would if I needed additional rate/bandwidth limits however.
257 */
258 len = mm_strlen(line);
259 if (len > 0 && line[len - 1] == '\n')
260 line[len - 1] = '\0';
261 else continue;
262
263 /* Strip dangerous characters from line */
264 for (ptr = line; *ptr != '\0'; ptr++) {
265 if (*ptr < 32) {
266 status = 0;
267 break;
268 }
269 switch (*ptr) {
270 case ' ': /* No spaces in key names */
271 *ptr = '_';
272 break;
273 case '*': /* Considered as wildcards by mmstat(3) */
274 case '?':
275 case '%': /* Why not, we use stdarg(3) alot */
276 *ptr = '$';
277 break;
278 }
279 }
280 if (status == 0)
281 continue;
282
283 /* Now separate line in columns and verify if the number of columns
284 * is the expected one. If it's not, ignore it.
285 */
286 if (mm_strspl(cols, line, COL_MAX, '|') != COL_MAX)
287 continue;
288
289 /* Verify that status is valid, it consists of the last field. If
290 * a malformed request or a user-supplied entry containing '|' was
291 * present, this would simply ignore the line, the correct behavior.
292 */
293 if ((status = atoi(cols[COL_STATUS])) == 0)
294 continue;
295
296 /* Start an mmstat(3) transaction, which makes sure that everything
297 * be processed atomically. This also is the recovery unit.
298 * Using a transaction is not a requirement for atomicity in this
299 * case, but it's more efficient than performing each operation
300 * independantly, beleive it or not (only one I/O syscall required).
301 */
302 mmstat_transact(mms, TRUE);
303
304 if (LOG_GLOBAL)
305 mmstat(mms, STAT_UPDATE, 1, "apache|total|requests");
306
307 switch (status) {
308 case 200: /* Success */
309 {
310 long bytes;
311
312 bytes = atol(cols[COL_BYTES]);
313
314 if (LOG_GLOBAL)
315 mmstat(mms, STAT_UPDATE, bytes, "apache|total|bytes");
316 if (LOG_VHOST) {
317 mmstat(mms, STAT_UPDATE, 1, "apache|vhost|%s|requests",
318 cols[COL_VHOST]);
319 mmstat(mms, STAT_UPDATE, bytes, "apache|vhost|%s|bytes",
320 cols[COL_VHOST]);
321 if (LOG_REQUEST)
322 mmstat(mms, STAT_UPDATE, 1, "apache|vhost|%s|%s|%s",
323 cols[COL_VHOST], cols[COL_METHOD],
324 cols[COL_REQUEST]);
325 if (LOG_REFERER)
326 mmstat(mms, STAT_UPDATE, 1,
327 "apache|vhost|%s|referer|%s", cols[COL_VHOST],
328 cols[COL_REFERER]);
329 if (LOG_USERAGENT)
330 mmstat(mms, STAT_UPDATE, 1, "apache|vhost|%s|agent|%s",
331 cols[COL_VHOST], cols[COL_USERAGENT]);
332 }
333 }
334 break;
335 case 404: /* Not found */
336 if (LOG_GLOBAL)
337 mmstat(mms, STAT_UPDATE, 1, "apache|total|errors");
338 if (LOG_VHOST) {
339 mmstat(mms, STAT_UPDATE, 1, "apache|vhost|%s|errors",
340 cols[COL_VHOST]);
341 if (LOG_REFERER)
342 mmstat(mms, STAT_UPDATE, 1, "apache|vhost|%s|referer|%s",
343 cols[COL_VHOST], cols[COL_REFERER]);
344 if (LOG_USERAGENT)
345 mmstat(mms, STAT_UPDATE, 1, "apache|vhost|%s|agent|%s",
346 cols[COL_VHOST], cols[COL_USERAGENT]);
347 }
348 break;
349 case 400: /* Bad request */
350 if (LOG_GLOBAL)
351 mmstat(mms, STAT_UPDATE, 1, "apache|total|errors");
352 if (LOG_VHOST) {
353 if (LOG_REFERER)
354 mmstat(mms, STAT_UPDATE, 1, "apache|vhost|%s|referer|%s",
355 cols[COL_VHOST], cols[COL_REFERER]);
356 if (LOG_USERAGENT)
357 mmstat(mms, STAT_UPDATE, 1, "apache|vhost|agent|%s",
358 cols[COL_VHOST], cols[COL_USERAGENT]);
359 }
360 break;
361 case 403: /* Denied */
362 if (LOG_GLOBAL) {
363 mmstat(mms, STAT_UPDATE, 1, "apache|total|denied");
364 if (LOG_REMOTEADDR)
365 mmstat(mms, STAT_UPDATE, 1, "apache|denied|%s",
366 cols[COL_REMOTEADDR]);
367 }
368 if (LOG_VHOST) {
369 mmstat(mms, STAT_UPDATE, 1, "apache|vhost|%s|denied",
370 cols[COL_VHOST]);
371 if (LOG_REMOTEADDR)
372 mmstat(mms, STAT_UPDATE, 1, "apache|vhost|%s|denied|%s",
373 cols[COL_VHOST], cols[COL_REMOTEADDR]);
374 if (LOG_REFERER)
375 mmstat(mms, STAT_UPDATE, 1, "apache|vhost|%s|referer|%s",
376 cols[COL_VHOST], cols[COL_REFERER]);
377 if (LOG_USERAGENT)
378 mmstat(mms, STAT_UPDATE, 1, "apache|vhost|%s|agent|%s",
379 cols[COL_VHOST], cols[COL_USERAGENT]);
380 }
381 break;
382 }
383
384 /* Close transaction, that is, commit any changes. Once this is
385 * called, the statistics are relayed to the mmstat(8) daemon.
386 */
387 mmstat_transact(mms, FALSE);
388 }
389}