mmlib/mmat: replace some variables by literal constants
[mmondor.git] / mmsoftware / apache-mmstat / apache-mmstat.c
CommitLineData
b232bd02 1/* $Id: apache-mmstat.c,v 1.5 2007/12/05 23:47:55 mmondor Exp $ */
a83cfe4c
MM
2
3/*
4 * Copyright (C) 2003, Matthew Mondor
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed by Matthew Mondor.
18 * 4. The name of Matthew Mondor may not be used to endorse or promote
19 * products derived from this software without specific prior written
20 * permission.
21 * 5. Redistribution of source code may not be released under the terms of
22 * any GNU Public License derivate.
23 *
24 * THIS SOFTWARE IS PROVIDED BY MATTHEW MONDOR ``AS IS'' AND ANY EXPRESS OR
25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27 * IN NO EVENT SHALL MATTHEW MONDOR BE LIABLE FOR ANY DIRECT, INDIRECT,
28 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
29 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
30 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
33 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 */
35
36
37
38#include <sys/types.h>
b54ff31e 39#include <errno.h>
a83cfe4c
MM
40#include <stdlib.h>
41#include <fcntl.h>
42#include <unistd.h>
b232bd02 43#include <stdbool.h>
a83cfe4c 44#include <stdio.h>
b232bd02 45#include <string.h>
a83cfe4c
MM
46#include <syslog.h>
47#include <signal.h>
48
49#include <mmtypes.h>
50#include <mmreadcfg.h> /* Only used for user/group related functions */
51#include <mmstring.h>
52#include <mmstat.h>
53
54
55
56#define LINESIZ 4096
57
58
59
b54ff31e
MM
60int main(int, char **);
61
62static void log_parse(mmstat_t *, char *);
63static void sighandler(int);
a83cfe4c
MM
64
65
66
67/* Our list of command line arguments */
68enum argline {
69 ARG_COMMAND = 0,
70 ARG_USER,
71 ARG_GROUPS,
72 ARG_CONF,
73 ARG_OPTIONS,
74 ARG_MAX
75};
76
77/* Logline columns which we expect */
78enum logline {
79 COL_VHOST = 0,
80 COL_REMOTEADDR,
81 COL_REFERER,
82 COL_USERAGENT,
83 COL_BYTES,
84 COL_METHOD,
85 COL_REQUEST,
86 COL_STATUS,
87 COL_MAX
88};
89
90
91
92/* Globals */
b5b9ee20
MM
93static bool LOG_GLOBAL; /* G */
94static bool LOG_VHOST; /* V */
95static bool LOG_REFERER; /* R */
96static bool LOG_USERAGENT; /* U */
97static bool LOG_REMOTEADDR; /* A */
98static bool LOG_REQUEST; /* F */
a83cfe4c
MM
99
100
101
b54ff31e
MM
102int
103main(int argc, char **argv)
a83cfe4c 104{
b54ff31e
MM
105 char *linebuf;
106 mmstat_t mms;
107 struct sigaction act;
a83cfe4c
MM
108
109 /* Setup a signal handler for SIGSEGV so that we prevent core dumping if
110 * we ever crash.
111 */
112 act.sa_handler = sighandler;
113 act.sa_flags = SA_NOCLDWAIT;
114 sigemptyset(&act.sa_mask);
115 sigaction(SIGSEGV, &act, NULL);
116
117 /* We're normally started from apache, and run as the superuser. We
118 * therefore do all we can to be safe until we drop privileges... Let's
119 * first redirect unnecessary filedescriptors to /dev/null. But, keep
120 * stdin, of course, which we'll read logs from later on.
121 */
122 {
b54ff31e 123 int fd;
a83cfe4c
MM
124
125 if ((fd = open("/dev/null", O_RDWR)) != -1) {
126 dup2(fd, 1);
127 dup2(fd, 2);
128 if (fd > 2)
129 close(fd);
130 }
131 }
132
133 /* Now perform sanity checking on launching mode and user supplied
134 * arguments.
135 */
136
137 /* Apache launches us as uid 0, we'll drop privileges soon however */
138 if (getuid() != 0) {
139 syslog(LOG_NOTICE, "%s: Not started as uid 0 from apache!? (uid %d)",
140 argv[ARG_COMMAND], getuid());
568e2272 141 exit(EXIT_FAILURE);
a83cfe4c
MM
142 }
143
144 /* We only accept a fixed number of arguments so that we restrict the
145 * need for getopt() and other libraries, or a more complex system when
146 * we're root.
147 */
148 if (argc != ARG_MAX) {
149 syslog(LOG_NOTICE, "%s: Started with wrong parameters",
150 argv[ARG_COMMAND]);
568e2272 151 exit(EXIT_FAILURE);
a83cfe4c
MM
152 }
153
154 /* Make sure that supplied user and group(s) are valid, and if so,
155 * drop privileges already.
156 */
157 {
b54ff31e
MM
158 uid_t uid;
159 gid_t *gids;
160 int ngids;
a83cfe4c
MM
161
162 if ((uid = mmgetuid(argv[ARG_USER])) == -1) {
163 syslog(LOG_NOTICE, "%s: Unknown user '%s'", argv[ARG_COMMAND],
164 argv[ARG_USER]);
568e2272 165 exit(EXIT_FAILURE);
a83cfe4c
MM
166 }
167
168 if (!(gids = mmgetgidarray(&ngids, argv[ARG_GROUPS]))) {
169 syslog(LOG_NOTICE, "%s: One of following groups unknown: '%s'",
170 argv[ARG_COMMAND], argv[ARG_GROUPS]);
568e2272 171 exit(EXIT_FAILURE);
a83cfe4c
MM
172 }
173
174 /* NOTE: mmdropprivs() uses setegid(2), setgid(2), setgroups(2),
175 * seteuid(2), setgid(2), and then verifies that it really changed to
176 * the expected user permissions, in order to return TRUE on success.
177 */
178 if (!mmdropprivs(uid, gids, ngids)) {
179 syslog(LOG_NOTICE, "%s: Cannot change uid and gids to safe privs",
180 argv[ARG_COMMAND]);
568e2272 181 exit(EXIT_FAILURE);
a83cfe4c
MM
182 }
183 mmfreegidarray(gids);
184 }
185
186 /* Et voila, we're no longer the superuser. We can now proceed and
187 * perform our slave chores as mortals. First set the MMSTATCONF
188 * environment variable for mmstat(3) API to load the right configuration
189 * file. Then, call the logging function, just because we want the main
190 * loop out of main().
191 */
192 /* Log nothing by default, enable parts which were requested only. */
193 LOG_GLOBAL = LOG_VHOST = LOG_REFERER = LOG_USERAGENT = LOG_REMOTEADDR =
194 LOG_REQUEST = FALSE;
0983cadf 195 if (strchr(argv[ARG_OPTIONS], 'G'))
a83cfe4c 196 LOG_GLOBAL = TRUE;
0983cadf 197 if (strchr(argv[ARG_OPTIONS], 'V'))
a83cfe4c 198 LOG_VHOST = TRUE;
0983cadf 199 if (strchr(argv[ARG_OPTIONS], 'R'))
a83cfe4c 200 LOG_REFERER = TRUE;
0983cadf 201 if (strchr(argv[ARG_OPTIONS], 'U'))
a83cfe4c 202 LOG_USERAGENT = TRUE;
0983cadf 203 if (strchr(argv[ARG_OPTIONS], 'A'))
a83cfe4c 204 LOG_REMOTEADDR = TRUE;
0983cadf 205 if (strchr(argv[ARG_OPTIONS], 'F'))
a83cfe4c
MM
206 LOG_REQUEST = TRUE;
207
208 if (setenv("MMSTATCONF", argv[ARG_CONF], TRUE) != 0) {
209 syslog(LOG_NOTICE, "%s: Cannot setenv(3)", argv[ARG_COMMAND]);
568e2272 210 exit(EXIT_FAILURE);
a83cfe4c
MM
211 }
212
213 if (!mmstat_init(&mms, TRUE, TRUE)) {
214 syslog(LOG_NOTICE, "%s: Cannot initialize mmstat(3)",
215 argv[ARG_COMMAND]);
568e2272 216 exit(EXIT_FAILURE);
a83cfe4c
MM
217 }
218
219 /* We preferably don't want the line buffer to be on the stack */
220 if ((linebuf = malloc(LINESIZ)) == NULL) {
221 syslog(LOG_NOTICE, "%s: Cannot allocate line buffer",
222 argv[ARG_COMMAND]);
568e2272 223 exit(EXIT_FAILURE);
a83cfe4c
MM
224 }
225
226 log_parse(&mms, linebuf);
227
228 /* NOTREACHED */
568e2272 229 exit(EXIT_SUCCESS);
a83cfe4c
MM
230}
231
232
233/* ARGSUSED */
b54ff31e
MM
234static void
235sighandler(int sig)
a83cfe4c
MM
236{
237 /* We only catch SIGSEGV with this handler, and exit normally. */
568e2272 238 exit(EXIT_SUCCESS);
a83cfe4c
MM
239}
240
241
242/* When we get called, privileges have been revoked and mmstat(3) has been
243 * successfully initialized.
244 */
b54ff31e
MM
245static void
246log_parse(mmstat_t *mms, char *line)
a83cfe4c 247{
b54ff31e 248 char *cols[COL_MAX + 1];
a83cfe4c
MM
249
250 /* We'll exit if the pipe is closed by apache */
251 while (fgets(line, LINESIZ - 1, stdin) == line) {
b54ff31e
MM
252 size_t len;
253 int status;
254 char *ptr;
255
256 status = 1;
a83cfe4c
MM
257
258 /* Strip ending "\n". If there are none, we ignore the line as it
259 * consists of an abnormally long request which exceeds LINESIZ.
260 * It's next continueing line will then obviously not match the
261 * expected columns and will as a result also be ignored. It's
262 * unfortunate that fgets(3) cannot report that the line was not
263 * terminated in a faster way, without us having to go strip the
264 * line termination, but oh well, I don't want to use mmfd(3) for
265 * this. I would if I needed additional rate/bandwidth limits however.
266 */
0983cadf 267 len = strlen(line);
a83cfe4c
MM
268 if (len > 0 && line[len - 1] == '\n')
269 line[len - 1] = '\0';
b54ff31e
MM
270 else
271 continue;
a83cfe4c
MM
272
273 /* Strip dangerous characters from line */
274 for (ptr = line; *ptr != '\0'; ptr++) {
275 if (*ptr < 32) {
276 status = 0;
277 break;
278 }
279 switch (*ptr) {
280 case ' ': /* No spaces in key names */
281 *ptr = '_';
282 break;
283 case '*': /* Considered as wildcards by mmstat(3) */
568e2272 284 /* FALLTHROUGH */
a83cfe4c 285 case '?':
568e2272 286 /* FALLTHROUGH */
a83cfe4c
MM
287 case '%': /* Why not, we use stdarg(3) alot */
288 *ptr = '$';
289 break;
290 }
291 }
292 if (status == 0)
293 continue;
294
295 /* Now separate line in columns and verify if the number of columns
296 * is the expected one. If it's not, ignore it.
297 */
298 if (mm_strspl(cols, line, COL_MAX, '|') != COL_MAX)
299 continue;
300
301 /* Verify that status is valid, it consists of the last field. If
302 * a malformed request or a user-supplied entry containing '|' was
303 * present, this would simply ignore the line, the correct behavior.
304 */
305 if ((status = atoi(cols[COL_STATUS])) == 0)
306 continue;
307
308 /* Start an mmstat(3) transaction, which makes sure that everything
309 * be processed atomically. This also is the recovery unit.
310 * Using a transaction is not a requirement for atomicity in this
311 * case, but it's more efficient than performing each operation
312 * independantly, beleive it or not (only one I/O syscall required).
313 */
314 mmstat_transact(mms, TRUE);
315
316 if (LOG_GLOBAL)
317 mmstat(mms, STAT_UPDATE, 1, "apache|total|requests");
318
319 switch (status) {
320 case 200: /* Success */
321 {
b54ff31e 322 long bytes;
a83cfe4c
MM
323
324 bytes = atol(cols[COL_BYTES]);
325
326 if (LOG_GLOBAL)
327 mmstat(mms, STAT_UPDATE, bytes, "apache|total|bytes");
328 if (LOG_VHOST) {
329 mmstat(mms, STAT_UPDATE, 1, "apache|vhost|%s|requests",
330 cols[COL_VHOST]);
331 mmstat(mms, STAT_UPDATE, bytes, "apache|vhost|%s|bytes",
332 cols[COL_VHOST]);
333 if (LOG_REQUEST)
334 mmstat(mms, STAT_UPDATE, 1, "apache|vhost|%s|%s|%s",
335 cols[COL_VHOST], cols[COL_METHOD],
336 cols[COL_REQUEST]);
337 if (LOG_REFERER)
338 mmstat(mms, STAT_UPDATE, 1,
339 "apache|vhost|%s|referer|%s", cols[COL_VHOST],
340 cols[COL_REFERER]);
341 if (LOG_USERAGENT)
342 mmstat(mms, STAT_UPDATE, 1, "apache|vhost|%s|agent|%s",
343 cols[COL_VHOST], cols[COL_USERAGENT]);
344 }
345 }
346 break;
347 case 404: /* Not found */
348 if (LOG_GLOBAL)
349 mmstat(mms, STAT_UPDATE, 1, "apache|total|errors");
350 if (LOG_VHOST) {
351 mmstat(mms, STAT_UPDATE, 1, "apache|vhost|%s|errors",
352 cols[COL_VHOST]);
353 if (LOG_REFERER)
354 mmstat(mms, STAT_UPDATE, 1, "apache|vhost|%s|referer|%s",
355 cols[COL_VHOST], cols[COL_REFERER]);
356 if (LOG_USERAGENT)
357 mmstat(mms, STAT_UPDATE, 1, "apache|vhost|%s|agent|%s",
358 cols[COL_VHOST], cols[COL_USERAGENT]);
359 }
360 break;
361 case 400: /* Bad request */
362 if (LOG_GLOBAL)
363 mmstat(mms, STAT_UPDATE, 1, "apache|total|errors");
364 if (LOG_VHOST) {
365 if (LOG_REFERER)
366 mmstat(mms, STAT_UPDATE, 1, "apache|vhost|%s|referer|%s",
367 cols[COL_VHOST], cols[COL_REFERER]);
368 if (LOG_USERAGENT)
369 mmstat(mms, STAT_UPDATE, 1, "apache|vhost|agent|%s",
370 cols[COL_VHOST], cols[COL_USERAGENT]);
371 }
372 break;
373 case 403: /* Denied */
374 if (LOG_GLOBAL) {
375 mmstat(mms, STAT_UPDATE, 1, "apache|total|denied");
376 if (LOG_REMOTEADDR)
377 mmstat(mms, STAT_UPDATE, 1, "apache|denied|%s",
378 cols[COL_REMOTEADDR]);
379 }
380 if (LOG_VHOST) {
381 mmstat(mms, STAT_UPDATE, 1, "apache|vhost|%s|denied",
382 cols[COL_VHOST]);
383 if (LOG_REMOTEADDR)
384 mmstat(mms, STAT_UPDATE, 1, "apache|vhost|%s|denied|%s",
385 cols[COL_VHOST], cols[COL_REMOTEADDR]);
386 if (LOG_REFERER)
387 mmstat(mms, STAT_UPDATE, 1, "apache|vhost|%s|referer|%s",
388 cols[COL_VHOST], cols[COL_REFERER]);
389 if (LOG_USERAGENT)
390 mmstat(mms, STAT_UPDATE, 1, "apache|vhost|%s|agent|%s",
391 cols[COL_VHOST], cols[COL_USERAGENT]);
392 }
393 break;
394 }
395
396 /* Close transaction, that is, commit any changes. Once this is
397 * called, the statistics are relayed to the mmstat(8) daemon.
398 */
b54ff31e
MM
399 if (!mmstat_transact(mms, FALSE))
400 syslog(LOG_NOTICE, "mmstat error - %s", strerror(errno));
a83cfe4c
MM
401 }
402}