Commit 5e1a794101686533cddd9b1672373d397e62d2c5
- Diff rendering mode:
- inline
- side by side
logfiles.cpp
(282 / 282)
|   | |||
| 27 | 27 | #define NOTREACHED 0 | |
| 28 | 28 | ||
| 29 | 29 | #define AMAZON_S3_REGEX "^[0-9a-f]+ ([-A-z0-9_\\.]+) \\[(.*)\\] ([0-9\\.]+) " \ | |
| 30 | "[0-9a-f]+ [0-9A-F]+ \\S+ \\S+ (\"[^\"]*\") (\\d+) [-A-z0-9]+ ([-0-9]+) " \ | ||
| 31 | "[-0-9]+ \\d+ [-0-9]+ (\"[^\"]*\") (\"[^\"]*\")" | ||
| 30 | "[0-9a-f]+ [0-9A-F]+ \\S+ \\S+ (\"[^\"]*\") (\\d+) [-A-z0-9]+ ([-0-9]+) " \ | ||
| 31 | "[-0-9]+ \\d+ [-0-9]+ (\"[^\"]*\") (\"[^\"]*\")" | ||
| 32 | 32 | ||
| 33 | 33 | boost::regex amazon_s3_regex(AMAZON_S3_REGEX, boost::regex::perl); | |
| 34 | 34 | ||
| 35 | 35 | static bool myGzgets(struct logfile *lf) | |
| 36 | 36 | { | |
| 37 | char *rv=lf->line; | ||
| 38 | int s=LINE_BUFFER; | ||
| 39 | int bytesRead=0; | ||
| 37 | char *rv=lf->line; | ||
| 38 | int s=LINE_BUFFER; | ||
| 39 | int bytesRead=0; | ||
| 40 | 40 | ||
| 41 | lf->lineLength=0; | ||
| 41 | lf->lineLength=0; | ||
| 42 | 42 | ||
| 43 | for(;;) { | ||
| 44 | if(lf->gzBufCur > lf->gzBufEnd || lf->gzBufEnd == NULL) { | ||
| 45 | /* Fetch some more stuff */ | ||
| 46 | bytesRead=gzread(lf->input, lf->gzBuf, GZBUFFER); | ||
| 47 | lf->gzBufEnd=bytesRead + lf->gzBuf - 1; | ||
| 48 | /* Make sure we got something */ | ||
| 49 | if(bytesRead == 0) { | ||
| 50 | return(false); | ||
| 51 | } | ||
| 52 | lf->gzBufCur=lf->gzBuf; | ||
| 53 | } | ||
| 54 | /* Make sure we do not get too many characters */ | ||
| 55 | if(--s > 0) { | ||
| 56 | *rv++ = *lf->gzBufCur; | ||
| 57 | lf->lineLength++; | ||
| 58 | if(*(lf->gzBufCur++) == '\n') { | ||
| 59 | *rv=0x00; | ||
| 60 | return(true); | ||
| 61 | } | ||
| 62 | } else { | ||
| 63 | *rv=0x00; | ||
| 64 | return(true); | ||
| 65 | } | ||
| 66 | } | ||
| 43 | for(;;) { | ||
| 44 | if(lf->gzBufCur > lf->gzBufEnd || lf->gzBufEnd == NULL) { | ||
| 45 | /* Fetch some more stuff */ | ||
| 46 | bytesRead=gzread(lf->input, lf->gzBuf, GZBUFFER); | ||
| 47 | lf->gzBufEnd=bytesRead + lf->gzBuf - 1; | ||
| 48 | /* Make sure we got something */ | ||
| 49 | if(bytesRead == 0) { | ||
| 50 | return(false); | ||
| 51 | } | ||
| 52 | lf->gzBufCur=lf->gzBuf; | ||
| 53 | } | ||
| 54 | /* Make sure we do not get too many characters */ | ||
| 55 | if(--s > 0) { | ||
| 56 | *rv++ = *lf->gzBufCur; | ||
| 57 | lf->lineLength++; | ||
| 58 | if(*(lf->gzBufCur++) == '\n') { | ||
| 59 | *rv=0x00; | ||
| 60 | return(true); | ||
| 61 | } | ||
| 62 | } else { | ||
| 63 | *rv=0x00; | ||
| 64 | return(true); | ||
| 65 | } | ||
| 66 | } | ||
| 67 | 67 | ||
| 68 | assert(NOTREACHED); | ||
| 69 | return(rv); | ||
| 68 | assert(NOTREACHED); | ||
| 69 | return(rv); | ||
| 70 | 70 | } | |
| 71 | 71 | ||
| 72 | 72 | /* Returns a value from logTypes */ | |
| 73 | 73 | static enum logType identifyLog(const char *line) { | |
| 74 | enum logType rv=UNKNOWN; | ||
| 75 | assert(line != NULL); | ||
| 74 | enum logType rv=UNKNOWN; | ||
| 75 | assert(line != NULL); | ||
| 76 | 76 | ||
| 77 | if(boost::regex_search(line, amazon_s3_regex)) { | ||
| 78 | rv=AMAZON_S3; | ||
| 79 | } else { | ||
| 80 | rv=COMMON; | ||
| 81 | } | ||
| 82 | return rv; | ||
| 77 | if(boost::regex_search(line, amazon_s3_regex)) { | ||
| 78 | rv=AMAZON_S3; | ||
| 79 | } else { | ||
| 80 | rv=COMMON; | ||
| 81 | } | ||
| 82 | return rv; | ||
| 83 | 83 | } | |
| 84 | 84 | ||
| 85 | 85 | static void outputLineS3(struct logfile *lf) { | |
| 86 | boost::cmatch what; | ||
| 86 | boost::cmatch what; | ||
| 87 | 87 | ||
| 88 | assert(lf); | ||
| 89 | assert(lf->line); | ||
| 88 | assert(lf); | ||
| 89 | assert(lf->line); | ||
| 90 | 90 | ||
| 91 | /* | ||
| 92 | // Positions as defined in the regex | ||
| 93 | S3_BUCKET 1 | ||
| 94 | S3_DATE 2 | ||
| 95 | S3_IP 3 | ||
| 96 | S3_REQ 4 | ||
| 97 | S3_STATUS 5 | ||
| 98 | S3_SIZE 6 | ||
| 99 | S3_REFER 7 | ||
| 100 | S3_UA 8 | ||
| 101 | */ | ||
| 91 | /* | ||
| 92 | // Positions as defined in the regex | ||
| 93 | S3_BUCKET 1 | ||
| 94 | S3_DATE 2 | ||
| 95 | S3_IP 3 | ||
| 96 | S3_REQ 4 | ||
| 97 | S3_STATUS 5 | ||
| 98 | S3_SIZE 6 | ||
| 99 | S3_REFER 7 | ||
| 100 | S3_UA 8 | ||
| 101 | */ | ||
| 102 | 102 | ||
| 103 | if(boost::regex_search(lf->line, what, amazon_s3_regex)) { | ||
| 104 | std::ostream_iterator<char> out(std::cout); | ||
| 105 | what.format(out, "$3 - - [$2] $4 $5 $6 $7 $8 $1\n"); | ||
| 106 | } else { | ||
| 107 | fprintf(stderr, "*** S3: Failed to match ``%s''\n", lf->line); | ||
| 108 | } | ||
| 103 | if(boost::regex_search(lf->line, what, amazon_s3_regex)) { | ||
| 104 | std::ostream_iterator<char> out(std::cout); | ||
| 105 | what.format(out, "$3 - - [$2] $4 $5 $6 $7 $8 $1\n"); | ||
| 106 | } else { | ||
| 107 | fprintf(stderr, "*** S3: Failed to match ``%s''\n", lf->line); | ||
| 108 | } | ||
| 109 | 109 | } | |
| 110 | 110 | ||
| 111 | 111 | static void outputLineDirect(struct logfile *lf) { | |
| 112 | assert(lf != NULL); | ||
| 113 | assert(lf->line != NULL); | ||
| 114 | if(fwrite(lf->line, lf->lineLength, 1, stdout) < lf->lineLength) { | ||
| 115 | perror("fwrite"); | ||
| 116 | exit(EX_IOERR); | ||
| 117 | } | ||
| 112 | assert(lf != NULL); | ||
| 113 | assert(lf->line != NULL); | ||
| 114 | if(fwrite(lf->line, lf->lineLength, 1, stdout) < lf->lineLength) { | ||
| 115 | perror("fwrite"); | ||
| 116 | exit(EX_IOERR); | ||
| 117 | } | ||
| 118 | 118 | } | |
| 119 | 119 | ||
| 120 | 120 | /** | |
| … | … | ||
| 122 | 122 | */ | |
| 123 | 123 | int openLogfile(struct logfile *lf) | |
| 124 | 124 | { | |
| 125 | int rv=ERROR; | ||
| 126 | assert(lf != NULL); | ||
| 125 | int rv=ERROR; | ||
| 126 | assert(lf != NULL); | ||
| 127 | 127 | ||
| 128 | assert(! lf->isOpen); | ||
| 128 | assert(! lf->isOpen); | ||
| 129 | 129 | ||
| 130 | fprintf(stderr, "*** Opening %s\n", lf->filename); | ||
| 130 | fprintf(stderr, "*** Opening %s\n", lf->filename); | ||
| 131 | 131 | ||
| 132 | lf->input=gzopen(lf->filename, "r"); | ||
| 132 | lf->input=gzopen(lf->filename, "r"); | ||
| 133 | 133 | ||
| 134 | if(lf->input != NULL) { | ||
| 135 | lf->isOpen=true; | ||
| 136 | rv=OK; | ||
| 137 | } | ||
| 134 | if(lf->input != NULL) { | ||
| 135 | lf->isOpen=true; | ||
| 136 | rv=OK; | ||
| 137 | } | ||
| 138 | 138 | ||
| 139 | /* Allocate the line buffer */ | ||
| 140 | lf->line=(char*)calloc(1, LINE_BUFFER); | ||
| 141 | assert(lf->line != NULL); | ||
| 142 | lf->lineLength=0; | ||
| 139 | /* Allocate the line buffer */ | ||
| 140 | lf->line=(char*)calloc(1, LINE_BUFFER); | ||
| 141 | assert(lf->line != NULL); | ||
| 142 | lf->lineLength=0; | ||
| 143 | 143 | ||
| 144 | /* Allocate the read buffer */ | ||
| 145 | lf->gzBuf=(char*)calloc(1, GZBUFFER); | ||
| 146 | assert(lf->gzBuf != NULL); | ||
| 144 | /* Allocate the read buffer */ | ||
| 145 | lf->gzBuf=(char*)calloc(1, GZBUFFER); | ||
| 146 | assert(lf->gzBuf != NULL); | ||
| 147 | 147 | ||
| 148 | lf->gzBufCur=NULL; | ||
| 149 | lf->gzBufEnd=NULL; | ||
| 148 | lf->gzBufCur=NULL; | ||
| 149 | lf->gzBufEnd=NULL; | ||
| 150 | 150 | ||
| 151 | return(rv); | ||
| 151 | return(rv); | ||
| 152 | 152 | } | |
| 153 | 153 | ||
| 154 | 154 | /* A date and a string */ | |
| 155 | 155 | struct date_str { | |
| 156 | char *datestr; | ||
| 157 | int val; | ||
| 156 | char *datestr; | ||
| 157 | int val; | ||
| 158 | 158 | }; | |
| 159 | 159 | ||
| 160 | 160 | #define MONTH_JAN (((((('J'<<8)|'a')<<8)|'n')<<8)|'/') | |
| … | … | ||
| 173 | 173 | /* Convert a three character month to the numeric value */ | |
| 174 | 174 | TESTED_STATIC int parseMonth(const char *input) { | |
| 175 | 175 | int rv=-1; | |
| 176 | int inputInt=0; | ||
| 176 | int inputInt=0; | ||
| 177 | 177 | ||
| 178 | for(int i=0; i<4 && input[i]; i++) { | ||
| 179 | inputInt = (inputInt << 8) | input[i]; | ||
| 180 | } | ||
| 178 | for(int i=0; i<4 && input[i]; i++) { | ||
| 179 | inputInt = (inputInt << 8) | input[i]; | ||
| 180 | } | ||
| 181 | 181 | ||
| 182 | switch(inputInt) { | ||
| 183 | case MONTH_JAN: rv=0; break; | ||
| 184 | case MONTH_FEB: rv=1; break; | ||
| 185 | case MONTH_MAR: rv=2; break; | ||
| 186 | case MONTH_APR: rv=3; break; | ||
| 187 | case MONTH_MAY: rv=4; break; | ||
| 188 | case MONTH_JUN: rv=5; break; | ||
| 189 | case MONTH_JUL: rv=6; break; | ||
| 190 | case MONTH_AUG: rv=7; break; | ||
| 191 | case MONTH_SEP: rv=8; break; | ||
| 192 | case MONTH_OCT: rv=9; break; | ||
| 193 | case MONTH_NOV: rv=10; break; | ||
| 194 | case MONTH_DEC: rv=11; break; | ||
| 195 | } | ||
| 182 | switch(inputInt) { | ||
| 183 | case MONTH_JAN: rv=0; break; | ||
| 184 | case MONTH_FEB: rv=1; break; | ||
| 185 | case MONTH_MAR: rv=2; break; | ||
| 186 | case MONTH_APR: rv=3; break; | ||
| 187 | case MONTH_MAY: rv=4; break; | ||
| 188 | case MONTH_JUN: rv=5; break; | ||
| 189 | case MONTH_JUL: rv=6; break; | ||
| 190 | case MONTH_AUG: rv=7; break; | ||
| 191 | case MONTH_SEP: rv=8; break; | ||
| 192 | case MONTH_OCT: rv=9; break; | ||
| 193 | case MONTH_NOV: rv=10; break; | ||
| 194 | case MONTH_DEC: rv=11; break; | ||
| 195 | } | ||
| 196 | 196 | ||
| 197 | return rv; | ||
| 197 | return rv; | ||
| 198 | 198 | } | |
| 199 | 199 | ||
| 200 | 200 | class BadTimestamp : public std::exception { | |
| 201 | virtual const char* what() const throw() { | ||
| 202 | return "Timestamp parse error"; | ||
| 203 | } | ||
| 201 | virtual const char* what() const throw() { | ||
| 202 | return "Timestamp parse error"; | ||
| 203 | } | ||
| 204 | 204 | }; | |
| 205 | 205 | ||
| 206 | 206 | static time_t parseTimestamp(struct logfile *lf) | |
| 207 | 207 | { | |
| 208 | char *p; | ||
| 208 | char *p; | ||
| 209 | 209 | ||
| 210 | assert(lf != NULL); | ||
| 211 | assert(lf->line != NULL); | ||
| 210 | assert(lf != NULL); | ||
| 211 | assert(lf->line != NULL); | ||
| 212 | 212 | ||
| 213 | lf->timestamp=-1; | ||
| 213 | lf->timestamp=-1; | ||
| 214 | 214 | ||
| 215 | p=lf->line; | ||
| 215 | p=lf->line; | ||
| 216 | 216 | ||
| 217 | try { | ||
| 217 | try { | ||
| 218 | 218 | ||
| 219 | /* The shortest line I can parse is about 32 characters. */ | ||
| 220 | if(lf->lineLength < 32) { | ||
| 221 | /* This is a broken entry */ | ||
| 222 | fprintf(stderr, "Broken log entry (too short): %s\n", p); | ||
| 223 | } else if(index(p, '[') != NULL) { | ||
| 224 | struct tm tm; | ||
| 225 | memset(&tm, 0x00, sizeof(tm)); | ||
| 219 | /* The shortest line I can parse is about 32 characters. */ | ||
| 220 | if(lf->lineLength < 32) { | ||
| 221 | /* This is a broken entry */ | ||
| 222 | fprintf(stderr, "Broken log entry (too short): %s\n", p); | ||
| 223 | } else if(index(p, '[') != NULL) { | ||
| 224 | struct tm tm; | ||
| 225 | memset(&tm, 0x00, sizeof(tm)); | ||
| 226 | 226 | ||
| 227 | p=index(p, '['); | ||
| 228 | /* Input validation */ | ||
| 229 | if(p == NULL || lf->lineLength < 32) { | ||
| 230 | fprintf(stderr, "invalid log line: %s\n", lf->line); | ||
| 231 | throw BadTimestamp(); | ||
| 232 | } | ||
| 227 | p=index(p, '['); | ||
| 228 | /* Input validation */ | ||
| 229 | if(p == NULL || lf->lineLength < 32) { | ||
| 230 | fprintf(stderr, "invalid log line: %s\n", lf->line); | ||
| 231 | throw BadTimestamp(); | ||
| 232 | } | ||
| 233 | 233 | ||
| 234 | /* fprintf(stderr, "**** Parsing %s\n", p); */ | ||
| 235 | p++; | ||
| 236 | tm.tm_mday=atoi(p); | ||
| 237 | p+=3; | ||
| 238 | tm.tm_mon=parseMonth(p); | ||
| 239 | p+=4; | ||
| 240 | tm.tm_year=atoi(p); | ||
| 241 | p+=5; | ||
| 242 | tm.tm_hour=atoi(p); | ||
| 243 | p+=3; | ||
| 244 | tm.tm_min=atoi(p); | ||
| 245 | p+=3; | ||
| 246 | tm.tm_sec=atoi(p); | ||
| 234 | /* fprintf(stderr, "**** Parsing %s\n", p); */ | ||
| 235 | p++; | ||
| 236 | tm.tm_mday=atoi(p); | ||
| 237 | p+=3; | ||
| 238 | tm.tm_mon=parseMonth(p); | ||
| 239 | p+=4; | ||
| 240 | tm.tm_year=atoi(p); | ||
| 241 | p+=5; | ||
| 242 | tm.tm_hour=atoi(p); | ||
| 243 | p+=3; | ||
| 244 | tm.tm_min=atoi(p); | ||
| 245 | p+=3; | ||
| 246 | tm.tm_sec=atoi(p); | ||
| 247 | 247 | ||
| 248 | /* Make sure it still looks like CLF */ | ||
| 249 | if(p[2] != ' ') { | ||
| 250 | fprintf(stderr, | ||
| 251 | "log line is starting to not look like CLF: %s\n", | ||
| 252 | lf->line); | ||
| 253 | throw BadTimestamp(); | ||
| 254 | } | ||
| 248 | /* Make sure it still looks like CLF */ | ||
| 249 | if(p[2] != ' ') { | ||
| 250 | fprintf(stderr, | ||
| 251 | "log line is starting to not look like CLF: %s\n", | ||
| 252 | lf->line); | ||
| 253 | throw BadTimestamp(); | ||
| 254 | } | ||
| 255 | 255 | ||
| 256 | tm.tm_year-=1900; | ||
| 256 | tm.tm_year-=1900; | ||
| 257 | 257 | ||
| 258 | /* Let mktime guess the timezone */ | ||
| 259 | tm.tm_isdst=-1; | ||
| 258 | /* Let mktime guess the timezone */ | ||
| 259 | tm.tm_isdst=-1; | ||
| 260 | 260 | ||
| 261 | lf->timestamp=mktime(&tm); | ||
| 261 | lf->timestamp=mktime(&tm); | ||
| 262 | 262 | ||
| 263 | } else { | ||
| 264 | fprintf(stderr, "Unknown log format: %s\n", p); | ||
| 265 | } | ||
| 263 | } else { | ||
| 264 | fprintf(stderr, "Unknown log format: %s\n", p); | ||
| 265 | } | ||
| 266 | 266 | ||
| 267 | } catch(BadTimestamp e) { | ||
| 268 | // Damn. | ||
| 269 | } | ||
| 267 | } catch(BadTimestamp e) { | ||
| 268 | // Damn. | ||
| 269 | } | ||
| 270 | 270 | ||
| 271 | if(lf->timestamp < 0) { | ||
| 272 | fprintf(stderr, "* Error parsing timestamp from %s", lf->line); | ||
| 273 | } | ||
| 271 | if(lf->timestamp < 0) { | ||
| 272 | fprintf(stderr, "* Error parsing timestamp from %s", lf->line); | ||
| 273 | } | ||
| 274 | 274 | ||
| 275 | return(lf->timestamp); | ||
| 275 | return(lf->timestamp); | ||
| 276 | 276 | } | |
| 277 | 277 | ||
| 278 | 278 | /** | |
| … | … | ||
| 281 | 281 | */ | |
| 282 | 282 | static bool nextLine(struct logfile *lf) | |
| 283 | 283 | { | |
| 284 | bool rv=false; | ||
| 284 | bool rv=false; | ||
| 285 | 285 | ||
| 286 | assert(lf != NULL); | ||
| 286 | assert(lf != NULL); | ||
| 287 | 287 | ||
| 288 | if(!lf->isOpen) { | ||
| 289 | int logfileOpened=openLogfile(lf); | ||
| 290 | /* This looks a little awkward, but it's the only way I can both | ||
| 291 | * avoid the side effect of having assert perform the task and | ||
| 292 | * not leave the variable unreferenced when assertions are off. | ||
| 293 | */ | ||
| 294 | if(logfileOpened != OK) { | ||
| 295 | assert(logfileOpened == OK); | ||
| 296 | } | ||
| 297 | /* Recurse to skip a line */ | ||
| 298 | rv=nextLine(lf); | ||
| 299 | assert(rv); | ||
| 300 | } | ||
| 288 | if(!lf->isOpen) { | ||
| 289 | int logfileOpened=openLogfile(lf); | ||
| 290 | /* This looks a little awkward, but it's the only way I can both | ||
| 291 | * avoid the side effect of having assert perform the task and | ||
| 292 | * not leave the variable unreferenced when assertions are off. | ||
| 293 | */ | ||
| 294 | if(logfileOpened != OK) { | ||
| 295 | assert(logfileOpened == OK); | ||
| 296 | } | ||
| 297 | /* Recurse to skip a line */ | ||
| 298 | rv=nextLine(lf); | ||
| 299 | assert(rv); | ||
| 300 | } | ||
| 301 | 301 | ||
| 302 | if(myGzgets(lf)) { | ||
| 303 | rv=true; | ||
| 304 | char *p=lf->line; | ||
| 305 | /* Make sure the line is short enough */ | ||
| 306 | assert(lf->lineLength < LINE_BUFFER); | ||
| 307 | /* Make sure we read a line */ | ||
| 308 | if(p[lf->lineLength-1] != '\n') { | ||
| 309 | fprintf(stderr, "*** BROKEN LOG ENTRY IN %s (no newline)\n", | ||
| 310 | lf->filename); | ||
| 311 | rv=false; | ||
| 312 | } else if(parseTimestamp(lf) == -1) { | ||
| 313 | /* If we can't parse the timestamp, give up */ | ||
| 314 | rv=false; | ||
| 315 | } | ||
| 316 | } | ||
| 302 | if(myGzgets(lf)) { | ||
| 303 | rv=true; | ||
| 304 | char *p=lf->line; | ||
| 305 | /* Make sure the line is short enough */ | ||
| 306 | assert(lf->lineLength < LINE_BUFFER); | ||
| 307 | /* Make sure we read a line */ | ||
| 308 | if(p[lf->lineLength-1] != '\n') { | ||
| 309 | fprintf(stderr, "*** BROKEN LOG ENTRY IN %s (no newline)\n", | ||
| 310 | lf->filename); | ||
| 311 | rv=false; | ||
| 312 | } else if(parseTimestamp(lf) == -1) { | ||
| 313 | /* If we can't parse the timestamp, give up */ | ||
| 314 | rv=false; | ||
| 315 | } | ||
| 316 | } | ||
| 317 | 317 | ||
| 318 | return rv; | ||
| 318 | return rv; | ||
| 319 | 319 | } | |
| 320 | 320 | ||
| 321 | 321 | static void closeLogfile(struct logfile *lf) | |
| 322 | 322 | { | |
| 323 | int gzerrno=0; | ||
| 323 | int gzerrno=0; | ||
| 324 | 324 | ||
| 325 | assert(lf != NULL); | ||
| 326 | assert(lf->input != NULL); | ||
| 327 | assert(lf->filename != NULL); | ||
| 325 | assert(lf != NULL); | ||
| 326 | assert(lf->input != NULL); | ||
| 327 | assert(lf->filename != NULL); | ||
| 328 | 328 | ||
| 329 | fprintf(stderr, "*** Closing %s\n", lf->filename); | ||
| 329 | fprintf(stderr, "*** Closing %s\n", lf->filename); | ||
| 330 | 330 | ||
| 331 | /* Free the line buffer */ | ||
| 332 | if(lf->line != NULL) { | ||
| 333 | free(lf->line); | ||
| 334 | lf->line=NULL; | ||
| 335 | } | ||
| 331 | /* Free the line buffer */ | ||
| 332 | if(lf->line != NULL) { | ||
| 333 | free(lf->line); | ||
| 334 | lf->line=NULL; | ||
| 335 | } | ||
| 336 | 336 | ||
| 337 | gzerrno=gzclose(lf->input); | ||
| 338 | if(gzerrno!=0) { | ||
| 339 | gzerror(lf->input, &gzerrno); | ||
| 340 | } | ||
| 341 | lf->isOpen=false; | ||
| 337 | gzerrno=gzclose(lf->input); | ||
| 338 | if(gzerrno!=0) { | ||
| 339 | gzerror(lf->input, &gzerrno); | ||
| 340 | } | ||
| 341 | lf->isOpen=false; | ||
| 342 | 342 | ||
| 343 | if(lf->gzBuf != NULL) { | ||
| 344 | free(lf->gzBuf); | ||
| 345 | lf->gzBuf = NULL; | ||
| 346 | } | ||
| 343 | if(lf->gzBuf != NULL) { | ||
| 344 | free(lf->gzBuf); | ||
| 345 | lf->gzBuf = NULL; | ||
| 346 | } | ||
| 347 | 347 | ||
| 348 | lf->gzBufCur=NULL; | ||
| 349 | lf->gzBufEnd=NULL; | ||
| 348 | lf->gzBufCur=NULL; | ||
| 349 | lf->gzBufEnd=NULL; | ||
| 350 | 350 | } | |
| 351 | 351 | ||
| 352 | 352 | /** | |
| … | … | ||
| 354 | 354 | */ | |
| 355 | 355 | static void destroyLogfile(struct logfile *lf) | |
| 356 | 356 | { | |
| 357 | assert(lf != NULL); | ||
| 357 | assert(lf != NULL); | ||
| 358 | 358 | ||
| 359 | fprintf(stderr, "** Destroying %s\n", lf->filename); | ||
| 359 | fprintf(stderr, "** Destroying %s\n", lf->filename); | ||
| 360 | 360 | ||
| 361 | if(lf->isOpen) { | ||
| 362 | closeLogfile(lf); | ||
| 363 | } | ||
| 361 | if(lf->isOpen) { | ||
| 362 | closeLogfile(lf); | ||
| 363 | } | ||
| 364 | 364 | ||
| 365 | /* Free the parts */ | ||
| 366 | if(lf->filename!=NULL) { | ||
| 367 | free(lf->filename); | ||
| 368 | } | ||
| 369 | if(lf->line != NULL) { | ||
| 370 | free(lf->line); | ||
| 371 | } | ||
| 372 | if(lf->gzBuf != NULL) { | ||
| 373 | free(lf->gzBuf); | ||
| 374 | } | ||
| 365 | /* Free the parts */ | ||
| 366 | if(lf->filename!=NULL) { | ||
| 367 | free(lf->filename); | ||
| 368 | } | ||
| 369 | if(lf->line != NULL) { | ||
| 370 | free(lf->line); | ||
| 371 | } | ||
| 372 | if(lf->gzBuf != NULL) { | ||
| 373 | free(lf->gzBuf); | ||
| 374 | } | ||
| 375 | 375 | ||
| 376 | /* Lastly, free the container itself. */ | ||
| 377 | free(lf); | ||
| 376 | /* Lastly, free the container itself. */ | ||
| 377 | free(lf); | ||
| 378 | 378 | } | |
| 379 | 379 | ||
| 380 | 380 | /** | |
| … | … | ||
| 382 | 382 | */ | |
| 383 | 383 | struct logfile *createLogfile(const char *filename) | |
| 384 | 384 | { | |
| 385 | struct logfile *rv=NULL; | ||
| 385 | struct logfile *rv=NULL; | ||
| 386 | 386 | ||
| 387 | rv=(struct logfile *)calloc(1, sizeof(struct logfile)); | ||
| 388 | assert(rv != NULL); | ||
| 387 | rv=(struct logfile *)calloc(1, sizeof(struct logfile)); | ||
| 388 | assert(rv != NULL); | ||
| 389 | 389 | ||
| 390 | rv->filename=(char *)strdup(filename); | ||
| 391 | assert(rv->filename != NULL); | ||
| 390 | rv->filename=(char *)strdup(filename); | ||
| 391 | assert(rv->filename != NULL); | ||
| 392 | 392 | ||
| 393 | /* Try to open the logfile */ | ||
| 394 | if(openLogfile(rv) != OK) { | ||
| 395 | destroyLogfile(rv); | ||
| 396 | rv=NULL; | ||
| 397 | } else { | ||
| 398 | /* If it's opened succesfully, read the next (first) line */ | ||
| 399 | if(!nextLine(rv)) { | ||
| 400 | /* If nextLine didn't return a record, this entry is invalid. */ | ||
| 401 | destroyLogfile(rv); | ||
| 402 | rv=NULL; | ||
| 403 | } else { | ||
| 404 | /* Otherwise, it's valid and we'll proceed, but close it. */ | ||
| 405 | switch(identifyLog(rv->line)) { | ||
| 406 | case COMMON: | ||
| 407 | fprintf(stderr, "**** %s is a common log file\n", filename); | ||
| 408 | rv->outputLine=outputLineDirect; | ||
| 409 | break; | ||
| 410 | case AMAZON_S3: | ||
| 411 | fprintf(stderr, "**** %s is an s3 log file\n", filename); | ||
| 412 | rv->outputLine=outputLineS3; | ||
| 413 | break; | ||
| 414 | case UNKNOWN: | ||
| 415 | fprintf(stderr, "! Can't identify type of %s\n", filename); | ||
| 416 | break; | ||
| 417 | default: | ||
| 418 | assert(false); | ||
| 419 | } | ||
| 393 | /* Try to open the logfile */ | ||
| 394 | if(openLogfile(rv) != OK) { | ||
| 395 | destroyLogfile(rv); | ||
| 396 | rv=NULL; | ||
| 397 | } else { | ||
| 398 | /* If it's opened succesfully, read the next (first) line */ | ||
| 399 | if(!nextLine(rv)) { | ||
| 400 | /* If nextLine didn't return a record, this entry is invalid. */ | ||
| 401 | destroyLogfile(rv); | ||
| 402 | rv=NULL; | ||
| 403 | } else { | ||
| 404 | /* Otherwise, it's valid and we'll proceed, but close it. */ | ||
| 405 | switch(identifyLog(rv->line)) { | ||
| 406 | case COMMON: | ||
| 407 | fprintf(stderr, "**** %s is a common log file\n", filename); | ||
| 408 | rv->outputLine=outputLineDirect; | ||
| 409 | break; | ||
| 410 | case AMAZON_S3: | ||
| 411 | fprintf(stderr, "**** %s is an s3 log file\n", filename); | ||
| 412 | rv->outputLine=outputLineS3; | ||
| 413 | break; | ||
| 414 | case UNKNOWN: | ||
| 415 | fprintf(stderr, "! Can't identify type of %s\n", filename); | ||
| 416 | break; | ||
| 417 | default: | ||
| 418 | assert(false); | ||
| 419 | } | ||
| 420 | 420 | ||
| 421 | if(rv->outputLine == NULL) { | ||
| 422 | destroyLogfile(rv); | ||
| 423 | rv=NULL; | ||
| 424 | } else { | ||
| 425 | closeLogfile(rv); | ||
| 426 | } | ||
| 427 | } | ||
| 428 | } | ||
| 421 | if(rv->outputLine == NULL) { | ||
| 422 | destroyLogfile(rv); | ||
| 423 | rv=NULL; | ||
| 424 | } else { | ||
| 425 | closeLogfile(rv); | ||
| 426 | } | ||
| 427 | } | ||
| 428 | } | ||
| 429 | 429 | ||
| 430 | return(rv); | ||
| 430 | return(rv); | ||
| 431 | 431 | } | |
| 432 | 432 | ||
| 433 | 433 | /** | |
| … | … | ||
| 436 | 436 | */ | |
| 437 | 437 | void skipRecord(log_queue& queue) | |
| 438 | 438 | { | |
| 439 | struct logfile *oldEntry=NULL; | ||
| 440 | assert(!queue.empty()); | ||
| 439 | struct logfile *oldEntry=NULL; | ||
| 440 | assert(!queue.empty()); | ||
| 441 | 441 | ||
| 442 | oldEntry=queue.top(); | ||
| 443 | queue.pop(); | ||
| 442 | oldEntry=queue.top(); | ||
| 443 | queue.pop(); | ||
| 444 | 444 | ||
| 445 | /* If stuff comes back, reinsert the old entry */ | ||
| 446 | if(nextLine(oldEntry)) { | ||
| 447 | queue.push(oldEntry); | ||
| 448 | } else { | ||
| 449 | destroyLogfile(oldEntry); | ||
| 450 | } | ||
| 445 | /* If stuff comes back, reinsert the old entry */ | ||
| 446 | if(nextLine(oldEntry)) { | ||
| 447 | queue.push(oldEntry); | ||
| 448 | } else { | ||
| 449 | destroyLogfile(oldEntry); | ||
| 450 | } | ||
| 451 | 451 | } |
logfiles.h
(42 / 42)
|   | |||
| 30 | 30 | ||
| 31 | 31 | extern "C" { | |
| 32 | 32 | ||
| 33 | enum logType { | ||
| 34 | COMMON, AMAZON_S3, UNKNOWN | ||
| 35 | }; | ||
| 33 | enum logType { | ||
| 34 | COMMON, AMAZON_S3, UNKNOWN | ||
| 35 | }; | ||
| 36 | 36 | ||
| 37 | /* The logfile itself */ | ||
| 38 | struct logfile { | ||
| 39 | /* The filename of this log entry */ | ||
| 40 | char *filename; | ||
| 41 | /* The current record */ | ||
| 42 | char *line; | ||
| 43 | /* Look! I know pascal! */ | ||
| 44 | size_t lineLength; | ||
| 45 | /* Function to output the current line */ | ||
| 46 | void (*outputLine)(struct logfile *); | ||
| 47 | /* The timestamp of the current record */ | ||
| 48 | time_t timestamp; | ||
| 49 | /* Indicate whether this logfile is open */ | ||
| 50 | bool isOpen; | ||
| 51 | /* Buffering for speeding up gzipped file access */ | ||
| 52 | char *gzBufCur; | ||
| 53 | char *gzBufEnd; | ||
| 54 | char *gzBuf; | ||
| 55 | /* The actual input stream being read */ | ||
| 56 | gzFile input; | ||
| 57 | }; | ||
| 37 | /* The logfile itself */ | ||
| 38 | struct logfile { | ||
| 39 | /* The filename of this log entry */ | ||
| 40 | char *filename; | ||
| 41 | /* The current record */ | ||
| 42 | char *line; | ||
| 43 | /* Look! I know pascal! */ | ||
| 44 | size_t lineLength; | ||
| 45 | /* Function to output the current line */ | ||
| 46 | void (*outputLine)(struct logfile *); | ||
| 47 | /* The timestamp of the current record */ | ||
| 48 | time_t timestamp; | ||
| 49 | /* Indicate whether this logfile is open */ | ||
| 50 | bool isOpen; | ||
| 51 | /* Buffering for speeding up gzipped file access */ | ||
| 52 | char *gzBufCur; | ||
| 53 | char *gzBufEnd; | ||
| 54 | char *gzBuf; | ||
| 55 | /* The actual input stream being read */ | ||
| 56 | gzFile input; | ||
| 57 | }; | ||
| 58 | 58 | ||
| 59 | class TimeCmp { | ||
| 60 | public: | ||
| 61 | bool operator() (const struct logfile* a, const struct logfile* b) const { | ||
| 62 | return a->timestamp > b->timestamp; | ||
| 63 | } | ||
| 64 | }; | ||
| 59 | class TimeCmp { | ||
| 60 | public: | ||
| 61 | bool operator() (const struct logfile* a, const struct logfile* b) const { | ||
| 62 | return a->timestamp > b->timestamp; | ||
| 63 | } | ||
| 64 | }; | ||
| 65 | 65 | ||
| 66 | typedef std::priority_queue<struct logfile *, | ||
| 67 | std::vector<struct logfile *>, TimeCmp> | ||
| 68 | log_queue; | ||
| 66 | typedef std::priority_queue<struct logfile *, | ||
| 67 | std::vector<struct logfile *>, TimeCmp> | ||
| 68 | log_queue; | ||
| 69 | 69 | ||
| 70 | /* Get a new logfile */ | ||
| 71 | struct logfile *createLogfile(const char *filename); | ||
| 72 | /* Skip to the next record in the list */ | ||
| 73 | void skipRecord(log_queue&); | ||
| 74 | /* Open a logfile */ | ||
| 75 | int openLogfile(struct logfile *lf); | ||
| 70 | /* Get a new logfile */ | ||
| 71 | struct logfile *createLogfile(const char *filename); | ||
| 72 | /* Skip to the next record in the list */ | ||
| 73 | void skipRecord(log_queue&); | ||
| 74 | /* Open a logfile */ | ||
| 75 | int openLogfile(struct logfile *lf); | ||
| 76 | 76 | ||
| 77 | /* Parse a month. This is generally static, but exposed when assertions are | ||
| 78 | enabled. */ | ||
| 77 | /* Parse a month. This is generally static, but exposed when assertions are | ||
| 78 | enabled. */ | ||
| 79 | 79 | #ifdef USE_ASSERT | |
| 80 | TESTED_STATIC int parseMonth(const char*); | ||
| 80 | TESTED_STATIC int parseMonth(const char*); | ||
| 81 | 81 | #endif | |
| 82 | 82 | ||
| 83 | 83 | } // extern C |
logmerge.cpp
(66 / 66)
|   | |||
| 17 | 17 | #define STDOUT_BUF_SIZE 1024*1024 | |
| 18 | 18 | ||
| 19 | 19 | namespace logmerge { | |
| 20 | static void initLogfiles(log_queue&, int, char **); | ||
| 21 | static void outputLogfiles(log_queue&); | ||
| 22 | static void initLogfile(log_queue&, const char*); | ||
| 20 | static void initLogfiles(log_queue&, int, char **); | ||
| 21 | static void outputLogfiles(log_queue&); | ||
| 22 | static void initLogfile(log_queue&, const char*); | ||
| 23 | 23 | } | |
| 24 | 24 | ||
| 25 | 25 | static void logmerge::initLogfile(log_queue& queue, const char *filename) { | |
| 26 | struct logfile *lf=createLogfile(filename); | ||
| 27 | if(lf!=NULL) { | ||
| 28 | queue.push(lf); | ||
| 29 | } else { | ||
| 30 | std::cerr << "Error opening logfile ``" << filename | ||
| 31 | << "''" << std::endl; | ||
| 32 | } | ||
| 26 | struct logfile *lf=createLogfile(filename); | ||
| 27 | if(lf!=NULL) { | ||
| 28 | queue.push(lf); | ||
| 29 | } else { | ||
| 30 | std::cerr << "Error opening logfile ``" << filename | ||
| 31 | << "''" << std::endl; | ||
| 32 | } | ||
| 33 | 33 | } | |
| 34 | 34 | ||
| 35 | 35 | /* Initialize all of the logfiles */ | |
| 36 | 36 | static void logmerge::initLogfiles(log_queue& queue, int argc, char **argv) | |
| 37 | 37 | { | |
| 38 | if(argc>1) { | ||
| 39 | for(int i=1; i<argc; i++) { | ||
| 40 | initLogfile(queue, argv[i]); | ||
| 41 | } | ||
| 42 | } else { | ||
| 43 | char buf[8192]; | ||
| 44 | std::cerr << "No logfiles given, accepting list from stdin" | ||
| 45 | << std::endl; | ||
| 46 | while(fgets((char*)&buf, sizeof(buf)-1, stdin)) { | ||
| 47 | buf[strlen(buf)-1]=0x00; | ||
| 48 | initLogfile(queue, buf); | ||
| 49 | } | ||
| 50 | } | ||
| 38 | if(argc>1) { | ||
| 39 | for(int i=1; i<argc; i++) { | ||
| 40 | initLogfile(queue, argv[i]); | ||
| 41 | } | ||
| 42 | } else { | ||
| 43 | char buf[8192]; | ||
| 44 | std::cerr << "No logfiles given, accepting list from stdin" | ||
| 45 | << std::endl; | ||
| 46 | while(fgets((char*)&buf, sizeof(buf)-1, stdin)) { | ||
| 47 | buf[strlen(buf)-1]=0x00; | ||
| 48 | initLogfile(queue, buf); | ||
| 49 | } | ||
| 50 | } | ||
| 51 | 51 | } | |
| 52 | 52 | ||
| 53 | 53 | static void logmerge::outputLogfiles(log_queue& queue) | |
| 54 | 54 | { | |
| 55 | int entries=0; | ||
| 56 | struct logfile *lf=NULL; | ||
| 55 | int entries=0; | ||
| 56 | struct logfile *lf=NULL; | ||
| 57 | 57 | ||
| 58 | while(!queue.empty()) { | ||
| 59 | entries++; | ||
| 58 | while(!queue.empty()) { | ||
| 59 | entries++; | ||
| 60 | 60 | ||
| 61 | lf=queue.top(); | ||
| 62 | assert(lf!=NULL); | ||
| 63 | if(! lf->isOpen) { | ||
| 64 | openLogfile(lf); | ||
| 65 | } | ||
| 61 | lf=queue.top(); | ||
| 62 | assert(lf!=NULL); | ||
| 63 | if(! lf->isOpen) { | ||
| 64 | openLogfile(lf); | ||
| 65 | } | ||
| 66 | 66 | ||
| 67 | lf->outputLine(lf); | ||
| 68 | skipRecord(queue); | ||
| 69 | } | ||
| 67 | lf->outputLine(lf); | ||
| 68 | skipRecord(queue); | ||
| 69 | } | ||
| 70 | 70 | ||
| 71 | std::cerr << "Read " << entries << " entries" << std::endl; | ||
| 71 | std::cerr << "Read " << entries << " entries" << std::endl; | ||
| 72 | 72 | } | |
| 73 | 73 | ||
| 74 | 74 | #ifdef USE_ASSERT | |
| 75 | 75 | static void testMonthParsing() { | |
| 76 | char *months[] = { | ||
| 77 | "Jan/", "Feb/", "Mar/", "Apr/", "May/", "Jun/", | ||
| 78 | "Jul/", "Aug/", "Sep/", "Oct/", "Nov/", "Dec/" | ||
| 79 | }; | ||
| 80 | for(int i=0; i<12; i++) { | ||
| 81 | for(int j=0; j<10; j++) { | ||
| 82 | int rv=parseMonth(months[i]); | ||
| 83 | if(i != rv) { | ||
| 84 | std::cerr << "Expected " << i << " for " | ||
| 85 | << months[i] << " got " << rv << std::endl; | ||
| 86 | abort(); | ||
| 87 | } | ||
| 88 | } | ||
| 89 | } | ||
| 90 | for(int j=0; j<10; j++) { | ||
| 91 | for(int i=0; i<12; i++) { | ||
| 92 | int rv=parseMonth(months[i]); | ||
| 93 | if(i != rv) { | ||
| 94 | std::cerr << "Expected " << i << " for " | ||
| 95 | << months[i] << " got " << rv << std::endl; | ||
| 96 | abort(); | ||
| 97 | } | ||
| 98 | } | ||
| 99 | } | ||
| 76 | char *months[] = { | ||
| 77 | "Jan/", "Feb/", "Mar/", "Apr/", "May/", "Jun/", | ||
| 78 | "Jul/", "Aug/", "Sep/", "Oct/", "Nov/", "Dec/" | ||
| 79 | }; | ||
| 80 | for(int i=0; i<12; i++) { | ||
| 81 | for(int j=0; j<10; j++) { | ||
| 82 | int rv=parseMonth(months[i]); | ||
| 83 | if(i != rv) { | ||
| 84 | std::cerr << "Expected " << i << " for " | ||
| 85 | << months[i] << " got " << rv << std::endl; | ||
| 86 | abort(); | ||
| 87 | } | ||
| 88 | } | ||
| 89 | } | ||
| 90 | for(int j=0; j<10; j++) { | ||
| 91 | for(int i=0; i<12; i++) { | ||
| 92 | int rv=parseMonth(months[i]); | ||
| 93 | if(i != rv) { | ||
| 94 | std::cerr << "Expected " << i << " for " | ||
| 95 | << months[i] << " got " << rv << std::endl; | ||
| 96 | abort(); | ||
| 97 | } | ||
| 98 | } | ||
| 99 | } | ||
| 100 | 100 | } | |
| 101 | 101 | #else | |
| 102 | 102 | static void testMonthParsing() { | |
| … | … | ||
| 108 | 108 | */ | |
| 109 | 109 | int main(int argc, char **argv) | |
| 110 | 110 | { | |
| 111 | log_queue queue; | ||
| 111 | log_queue queue; | ||
| 112 | 112 | ||
| 113 | testMonthParsing(); | ||
| 113 | testMonthParsing(); | ||
| 114 | 114 | ||
| 115 | setvbuf(stdout, NULL, _IOFBF, STDOUT_BUF_SIZE); | ||
| 115 | setvbuf(stdout, NULL, _IOFBF, STDOUT_BUF_SIZE); | ||
| 116 | 116 | ||
| 117 | logmerge::initLogfiles(queue, argc, argv); | ||
| 118 | logmerge::outputLogfiles(queue); | ||
| 117 | logmerge::initLogfiles(queue, argc, argv); | ||
| 118 | logmerge::outputLogfiles(queue); | ||
| 119 | 119 | ||
| 120 | return(0); | ||
| 120 | return(0); | ||
| 121 | 121 | } |

