*** empty log message ***
[mmondor.git] / mmsoftware / apache-mmstat / apache-mmstat.c
CommitLineData
568e2272 1/* $Id: apache-mmstat.c,v 1.2 2004/04/30 00:01:17 mmondor Exp $ */
a83cfe4c
MM
2
3/*
4 * Copyright (C) 2003, Matthew Mondor
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed by Matthew Mondor.
18 * 4. The name of Matthew Mondor may not be used to endorse or promote
19 * products derived from this software without specific prior written
20 * permission.
21 * 5. Redistribution of source code may not be released under the terms of
22 * any GNU Public License derivate.
23 *
24 * THIS SOFTWARE IS PROVIDED BY MATTHEW MONDOR ``AS IS'' AND ANY EXPRESS OR
25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27 * IN NO EVENT SHALL MATTHEW MONDOR BE LIABLE FOR ANY DIRECT, INDIRECT,
28 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
29 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
30 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
33 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 */
35
36
37
38#include <sys/types.h>
39#include <stdlib.h>
40#include <fcntl.h>
41#include <unistd.h>
42#include <stdio.h>
43#include <syslog.h>
44#include <signal.h>
45
46#include <mmtypes.h>
47#include <mmreadcfg.h> /* Only used for user/group related functions */
48#include <mmstring.h>
49#include <mmstat.h>
50
51
52
53#define LINESIZ 4096
54
55
56
57int main(int, char **);
58static void log_parse(mmstat_t *, char *);
59static void sighandler(int);
60
61
62
63/* Our list of command line arguments */
64enum argline {
65 ARG_COMMAND = 0,
66 ARG_USER,
67 ARG_GROUPS,
68 ARG_CONF,
69 ARG_OPTIONS,
70 ARG_MAX
71};
72
73/* Logline columns which we expect */
74enum logline {
75 COL_VHOST = 0,
76 COL_REMOTEADDR,
77 COL_REFERER,
78 COL_USERAGENT,
79 COL_BYTES,
80 COL_METHOD,
81 COL_REQUEST,
82 COL_STATUS,
83 COL_MAX
84};
85
86
87
88/* Globals */
89bool LOG_GLOBAL; /* G */
90bool LOG_VHOST; /* V */
91bool LOG_REFERER; /* R */
92bool LOG_USERAGENT; /* U */
93bool LOG_REMOTEADDR; /* A */
94bool LOG_REQUEST; /* F */
95
96
97
98int main(int argc, char **argv)
99{
100 char *linebuf;
101 mmstat_t mms;
102 struct sigaction act;
103
104 /* Setup a signal handler for SIGSEGV so that we prevent core dumping if
105 * we ever crash.
106 */
107 act.sa_handler = sighandler;
108 act.sa_flags = SA_NOCLDWAIT;
109 sigemptyset(&act.sa_mask);
110 sigaction(SIGSEGV, &act, NULL);
111
112 /* We're normally started from apache, and run as the superuser. We
113 * therefore do all we can to be safe until we drop privileges... Let's
114 * first redirect unnecessary filedescriptors to /dev/null. But, keep
115 * stdin, of course, which we'll read logs from later on.
116 */
117 {
118 int fd;
119
120 if ((fd = open("/dev/null", O_RDWR)) != -1) {
121 dup2(fd, 1);
122 dup2(fd, 2);
123 if (fd > 2)
124 close(fd);
125 }
126 }
127
128 /* Now perform sanity checking on launching mode and user supplied
129 * arguments.
130 */
131
132 /* Apache launches us as uid 0, we'll drop privileges soon however */
133 if (getuid() != 0) {
134 syslog(LOG_NOTICE, "%s: Not started as uid 0 from apache!? (uid %d)",
135 argv[ARG_COMMAND], getuid());
568e2272 136 exit(EXIT_FAILURE);
a83cfe4c
MM
137 }
138
139 /* We only accept a fixed number of arguments so that we restrict the
140 * need for getopt() and other libraries, or a more complex system when
141 * we're root.
142 */
143 if (argc != ARG_MAX) {
144 syslog(LOG_NOTICE, "%s: Started with wrong parameters",
145 argv[ARG_COMMAND]);
568e2272 146 exit(EXIT_FAILURE);
a83cfe4c
MM
147 }
148
149 /* Make sure that supplied user and group(s) are valid, and if so,
150 * drop privileges already.
151 */
152 {
153 uid_t uid;
154 gid_t *gids;
155 int ngids;
156
157 if ((uid = mmgetuid(argv[ARG_USER])) == -1) {
158 syslog(LOG_NOTICE, "%s: Unknown user '%s'", argv[ARG_COMMAND],
159 argv[ARG_USER]);
568e2272 160 exit(EXIT_FAILURE);
a83cfe4c
MM
161 }
162
163 if (!(gids = mmgetgidarray(&ngids, argv[ARG_GROUPS]))) {
164 syslog(LOG_NOTICE, "%s: One of following groups unknown: '%s'",
165 argv[ARG_COMMAND], argv[ARG_GROUPS]);
568e2272 166 exit(EXIT_FAILURE);
a83cfe4c
MM
167 }
168
169 /* NOTE: mmdropprivs() uses setegid(2), setgid(2), setgroups(2),
170 * seteuid(2), setgid(2), and then verifies that it really changed to
171 * the expected user permissions, in order to return TRUE on success.
172 */
173 if (!mmdropprivs(uid, gids, ngids)) {
174 syslog(LOG_NOTICE, "%s: Cannot change uid and gids to safe privs",
175 argv[ARG_COMMAND]);
568e2272 176 exit(EXIT_FAILURE);
a83cfe4c
MM
177 }
178 mmfreegidarray(gids);
179 }
180
181 /* Et voila, we're no longer the superuser. We can now proceed and
182 * perform our slave chores as mortals. First set the MMSTATCONF
183 * environment variable for mmstat(3) API to load the right configuration
184 * file. Then, call the logging function, just because we want the main
185 * loop out of main().
186 */
187 /* Log nothing by default, enable parts which were requested only. */
188 LOG_GLOBAL = LOG_VHOST = LOG_REFERER = LOG_USERAGENT = LOG_REMOTEADDR =
189 LOG_REQUEST = FALSE;
190 if (mm_strchr(argv[ARG_OPTIONS], 'G'))
191 LOG_GLOBAL = TRUE;
192 if (mm_strchr(argv[ARG_OPTIONS], 'V'))
193 LOG_VHOST = TRUE;
194 if (mm_strchr(argv[ARG_OPTIONS], 'R'))
195 LOG_REFERER = TRUE;
196 if (mm_strchr(argv[ARG_OPTIONS], 'U'))
197 LOG_USERAGENT = TRUE;
198 if (mm_strchr(argv[ARG_OPTIONS], 'A'))
199 LOG_REMOTEADDR = TRUE;
200 if (mm_strchr(argv[ARG_OPTIONS], 'F'))
201 LOG_REQUEST = TRUE;
202
203 if (setenv("MMSTATCONF", argv[ARG_CONF], TRUE) != 0) {
204 syslog(LOG_NOTICE, "%s: Cannot setenv(3)", argv[ARG_COMMAND]);
568e2272 205 exit(EXIT_FAILURE);
a83cfe4c
MM
206 }
207
208 if (!mmstat_init(&mms, TRUE, TRUE)) {
209 syslog(LOG_NOTICE, "%s: Cannot initialize mmstat(3)",
210 argv[ARG_COMMAND]);
568e2272 211 exit(EXIT_FAILURE);
a83cfe4c
MM
212 }
213
214 /* We preferably don't want the line buffer to be on the stack */
215 if ((linebuf = malloc(LINESIZ)) == NULL) {
216 syslog(LOG_NOTICE, "%s: Cannot allocate line buffer",
217 argv[ARG_COMMAND]);
568e2272 218 exit(EXIT_FAILURE);
a83cfe4c
MM
219 }
220
221 log_parse(&mms, linebuf);
222
223 /* NOTREACHED */
568e2272 224 exit(EXIT_SUCCESS);
a83cfe4c
MM
225}
226
227
228/* ARGSUSED */
229static void sighandler(int sig)
230{
231 /* We only catch SIGSEGV with this handler, and exit normally. */
568e2272 232 exit(EXIT_SUCCESS);
a83cfe4c
MM
233}
234
235
236/* When we get called, privileges have been revoked and mmstat(3) has been
237 * successfully initialized.
238 */
239static void log_parse(mmstat_t *mms, char *line)
240{
241 char *cols[COL_MAX + 1];
242
243 /* We'll exit if the pipe is closed by apache */
244 while (fgets(line, LINESIZ - 1, stdin) == line) {
245 size_t len;
246 int status = 1;
247 char *ptr;
248
249 /* Strip ending "\n". If there are none, we ignore the line as it
250 * consists of an abnormally long request which exceeds LINESIZ.
251 * It's next continueing line will then obviously not match the
252 * expected columns and will as a result also be ignored. It's
253 * unfortunate that fgets(3) cannot report that the line was not
254 * terminated in a faster way, without us having to go strip the
255 * line termination, but oh well, I don't want to use mmfd(3) for
256 * this. I would if I needed additional rate/bandwidth limits however.
257 */
258 len = mm_strlen(line);
259 if (len > 0 && line[len - 1] == '\n')
260 line[len - 1] = '\0';
261 else continue;
262
263 /* Strip dangerous characters from line */
264 for (ptr = line; *ptr != '\0'; ptr++) {
265 if (*ptr < 32) {
266 status = 0;
267 break;
268 }
269 switch (*ptr) {
270 case ' ': /* No spaces in key names */
271 *ptr = '_';
272 break;
273 case '*': /* Considered as wildcards by mmstat(3) */
568e2272 274 /* FALLTHROUGH */
a83cfe4c 275 case '?':
568e2272 276 /* FALLTHROUGH */
a83cfe4c
MM
277 case '%': /* Why not, we use stdarg(3) alot */
278 *ptr = '$';
279 break;
280 }
281 }
282 if (status == 0)
283 continue;
284
285 /* Now separate line in columns and verify if the number of columns
286 * is the expected one. If it's not, ignore it.
287 */
288 if (mm_strspl(cols, line, COL_MAX, '|') != COL_MAX)
289 continue;
290
291 /* Verify that status is valid, it consists of the last field. If
292 * a malformed request or a user-supplied entry containing '|' was
293 * present, this would simply ignore the line, the correct behavior.
294 */
295 if ((status = atoi(cols[COL_STATUS])) == 0)
296 continue;
297
298 /* Start an mmstat(3) transaction, which makes sure that everything
299 * be processed atomically. This also is the recovery unit.
300 * Using a transaction is not a requirement for atomicity in this
301 * case, but it's more efficient than performing each operation
302 * independantly, beleive it or not (only one I/O syscall required).
303 */
304 mmstat_transact(mms, TRUE);
305
306 if (LOG_GLOBAL)
307 mmstat(mms, STAT_UPDATE, 1, "apache|total|requests");
308
309 switch (status) {
310 case 200: /* Success */
311 {
312 long bytes;
313
314 bytes = atol(cols[COL_BYTES]);
315
316 if (LOG_GLOBAL)
317 mmstat(mms, STAT_UPDATE, bytes, "apache|total|bytes");
318 if (LOG_VHOST) {
319 mmstat(mms, STAT_UPDATE, 1, "apache|vhost|%s|requests",
320 cols[COL_VHOST]);
321 mmstat(mms, STAT_UPDATE, bytes, "apache|vhost|%s|bytes",
322 cols[COL_VHOST]);
323 if (LOG_REQUEST)
324 mmstat(mms, STAT_UPDATE, 1, "apache|vhost|%s|%s|%s",
325 cols[COL_VHOST], cols[COL_METHOD],
326 cols[COL_REQUEST]);
327 if (LOG_REFERER)
328 mmstat(mms, STAT_UPDATE, 1,
329 "apache|vhost|%s|referer|%s", cols[COL_VHOST],
330 cols[COL_REFERER]);
331 if (LOG_USERAGENT)
332 mmstat(mms, STAT_UPDATE, 1, "apache|vhost|%s|agent|%s",
333 cols[COL_VHOST], cols[COL_USERAGENT]);
334 }
335 }
336 break;
337 case 404: /* Not found */
338 if (LOG_GLOBAL)
339 mmstat(mms, STAT_UPDATE, 1, "apache|total|errors");
340 if (LOG_VHOST) {
341 mmstat(mms, STAT_UPDATE, 1, "apache|vhost|%s|errors",
342 cols[COL_VHOST]);
343 if (LOG_REFERER)
344 mmstat(mms, STAT_UPDATE, 1, "apache|vhost|%s|referer|%s",
345 cols[COL_VHOST], cols[COL_REFERER]);
346 if (LOG_USERAGENT)
347 mmstat(mms, STAT_UPDATE, 1, "apache|vhost|%s|agent|%s",
348 cols[COL_VHOST], cols[COL_USERAGENT]);
349 }
350 break;
351 case 400: /* Bad request */
352 if (LOG_GLOBAL)
353 mmstat(mms, STAT_UPDATE, 1, "apache|total|errors");
354 if (LOG_VHOST) {
355 if (LOG_REFERER)
356 mmstat(mms, STAT_UPDATE, 1, "apache|vhost|%s|referer|%s",
357 cols[COL_VHOST], cols[COL_REFERER]);
358 if (LOG_USERAGENT)
359 mmstat(mms, STAT_UPDATE, 1, "apache|vhost|agent|%s",
360 cols[COL_VHOST], cols[COL_USERAGENT]);
361 }
362 break;
363 case 403: /* Denied */
364 if (LOG_GLOBAL) {
365 mmstat(mms, STAT_UPDATE, 1, "apache|total|denied");
366 if (LOG_REMOTEADDR)
367 mmstat(mms, STAT_UPDATE, 1, "apache|denied|%s",
368 cols[COL_REMOTEADDR]);
369 }
370 if (LOG_VHOST) {
371 mmstat(mms, STAT_UPDATE, 1, "apache|vhost|%s|denied",
372 cols[COL_VHOST]);
373 if (LOG_REMOTEADDR)
374 mmstat(mms, STAT_UPDATE, 1, "apache|vhost|%s|denied|%s",
375 cols[COL_VHOST], cols[COL_REMOTEADDR]);
376 if (LOG_REFERER)
377 mmstat(mms, STAT_UPDATE, 1, "apache|vhost|%s|referer|%s",
378 cols[COL_VHOST], cols[COL_REFERER]);
379 if (LOG_USERAGENT)
380 mmstat(mms, STAT_UPDATE, 1, "apache|vhost|%s|agent|%s",
381 cols[COL_VHOST], cols[COL_USERAGENT]);
382 }
383 break;
384 }
385
386 /* Close transaction, that is, commit any changes. Once this is
387 * called, the statistics are relayed to the mmstat(8) daemon.
388 */
389 mmstat_transact(mms, FALSE);
390 }
391}