/
base64.c
677 lines (547 loc) · 15.2 KB
/
base64.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
// Test for MinGW.
#if defined(__MINGW32__) || defined(__MINGW64__)
# define MINGW
#endif
// Test for Windows.
#if defined(_WIN32) || defined(_WIN64)
# define WIN
#endif
// Decide if the writev(2) system call needs to be emulated as a series of
// write(2) calls. At least MinGW does not support writev(2).
#ifdef MINGW
# define EMULATE_WRITEV
#endif
// Include the necessary system header when using the system's writev(2).
#ifndef EMULATE_WRITEV
# define _XOPEN_SOURCE // Unlock IOV_MAX
# include <sys/uio.h>
#endif
#include <stdbool.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <getopt.h>
#include <errno.h>
#include <limits.h>
// Include Windows-specific headers.
#ifdef WIN
# include <io.h>
# include <fcntl.h>
#endif
#include "../include/libbase64.h"
// Size of the buffer for the "raw" (not base64-encoded) data in bytes.
#define BUFFER_RAW_SIZE (1024 * 1024)
// Size of the buffer for the base64-encoded data in bytes. The base64-encoded
// data is 4/3 the size of the input, with some margin to be sure.
#define BUFFER_ENC_SIZE (BUFFER_RAW_SIZE * 4 / 3 + 16)
// Global config structure.
struct config {
// Name by which the program was called on the command line.
const char *name;
// Name of the input file for logging purposes.
const char *file;
// Input file handle.
FILE *fp;
// Wrap width in characters, for encoding only.
size_t wrap;
// Whether to run in decode mode.
bool decode;
// Whether to just print the help text and exit.
bool print_help;
// Whether to strip newlines from the input when decoding.
bool strip_newlines;
// Whether to ignore any character not in the base64 alphabet.
bool ignore_garbage;
};
// Input/output buffer structure.
struct buffer {
// Runtime-allocated buffer for raw (unencoded) data.
char *raw;
// Runtime-allocated buffer for base64-encoded data.
char *enc;
};
// Optionally emulate writev(2) as a series of write calls.
#ifdef EMULATE_WRITEV
// Quick and dirty definition of IOV_MAX as it is probably not defined.
#ifndef IOV_MAX
# define IOV_MAX 1024
#endif
// Quick and dirty definition of this system struct, for local use only.
struct iovec {
// Opaque data pointer.
void *iov_base;
// Length of the data in bytes.
size_t iov_len;
};
static ssize_t
writev (const int fd, const struct iovec *iov, int iovcnt)
{
ssize_t r, nwrite = 0;
// Reset the error marker.
errno = 0;
while (iovcnt-- > 0) {
// Write the vector; propagate errors back to the caller. Note
// that this loses information about how much vectors have been
// successfully written, but that also seems to be the case
// with the real function. The API is somewhat flawed.
if ((r = write(fd, iov->iov_base, iov->iov_len)) < 0) {
return r;
}
// Update the total write count.
nwrite += r;
// Return early after a partial write; the caller should retry.
if ((size_t) r != iov->iov_len) {
break;
}
// Move to the next vector.
iov++;
}
return nwrite;
}
#endif // EMULATE_WRITEV
static bool
buffer_alloc (const struct config *config, struct buffer *buf)
{
if ((buf->raw = malloc(BUFFER_RAW_SIZE)) == NULL ||
(buf->enc = malloc(BUFFER_ENC_SIZE)) == NULL) {
free(buf->raw);
fprintf(stderr, "%s: malloc: %s\n",
config->name, strerror(errno));
return false;
}
return true;
}
static void
buffer_free (struct buffer *buf)
{
free(buf->raw);
free(buf->enc);
}
static bool
writev_retry (const struct config *config, struct iovec *iov, size_t nvec)
{
// Writing nothing always succeeds.
if (nvec == 0) {
return true;
}
while (true) {
ssize_t nwrite;
// Try to write the vectors to stdout.
if ((nwrite = writev(1, iov, nvec)) < 0) {
// Retry on EINTR.
if (errno == EINTR) {
continue;
}
// Quit on other errors.
fprintf(stderr, "%s: writev: %s\n",
config->name, strerror(errno));
return false;
}
// The return value of `writev' is the number of bytes written.
// To check for success, we traverse the list and remove all
// written vectors. The call succeeded if the list is empty.
while (true) {
// Retry if this vector is not or partially written.
if (iov->iov_len > (size_t) nwrite) {
char *base = iov->iov_base;
iov->iov_base = (size_t) nwrite + base;
iov->iov_len -= (size_t) nwrite;
break;
}
// Move to the next vector.
nwrite -= iov->iov_len;
iov++;
// Return successfully if all vectors were written.
if (--nvec == 0) {
return true;
}
}
}
}
static inline bool
iov_append (const struct config *config, struct iovec *iov,
size_t *nvec, char *base, const size_t len)
{
// Add the buffer to the IO vector array.
iov[*nvec].iov_base = base;
iov[*nvec].iov_len = len;
// Increment the array index. Flush the array if it is full.
if (++(*nvec) == IOV_MAX) {
if (writev_retry(config, iov, IOV_MAX) == false) {
return false;
}
*nvec = 0;
}
return true;
}
static bool
write_stdout (const struct config *config, const char *buf, size_t len)
{
while (len > 0) {
ssize_t nwrite;
// Try to write the buffer to stdout.
if ((nwrite = write(1, buf, len)) < 0) {
// Retry on EINTR.
if (errno == EINTR) {
continue;
}
// Quit on other errors.
fprintf(stderr, "%s: write: %s\n",
config->name, strerror(errno));
return false;
}
// Update the buffer position.
buf += (size_t) nwrite;
len -= (size_t) nwrite;
}
return true;
}
static bool
write_wrapped (const struct config *config, char *buf, size_t len)
{
static size_t col = 0;
// Special case: if buf is NULL, print final trailing newline.
if (buf == NULL) {
if (config->wrap > 0 && col > 0) {
return write_stdout(config, "\n", 1);
}
return true;
}
// If no wrap width is given, write the entire buffer.
if (config->wrap == 0) {
return write_stdout(config, buf, len);
}
// Statically allocated IO vector buffer.
static struct iovec iov[IOV_MAX];
size_t nvec = 0;
while (len > 0) {
// Number of characters to fill the current line.
size_t nwrite = config->wrap - col;
// Do not write more data than is available.
if (nwrite > len) {
nwrite = len;
}
// Append the data to the IO vector array.
if (iov_append(config, iov, &nvec, buf, nwrite) == false) {
return false;
}
// Advance the buffer.
len -= nwrite;
buf += nwrite;
col += nwrite;
// If the line is full, append a newline.
if (col == config->wrap) {
if (iov_append(config, iov, &nvec, "\n", 1) == false) {
return false;
}
col = 0;
}
}
// Write the remaining vectors.
if (writev_retry(config, iov, nvec) == false) {
return false;
}
return true;
}
static bool
encode (const struct config *config, struct buffer *buf)
{
size_t nread, nout;
struct base64_state state;
// Initialize the encoder's state structure.
base64_stream_encode_init(&state, 0);
// Read raw data into the buffer.
while ((nread = fread(buf->raw, 1, BUFFER_RAW_SIZE, config->fp)) > 0) {
// Encode the raw input into the encoded buffer.
base64_stream_encode(&state, buf->raw, nread, buf->enc, &nout);
// Append the encoded data to the output stream.
if (write_wrapped(config, buf->enc, nout) == false) {
return false;
}
}
// Check for stream errors.
if (ferror(config->fp)) {
fprintf(stderr, "%s: %s: read error\n",
config->name, config->file);
return false;
}
// Finalize the encoding by adding proper stream terminators.
base64_stream_encode_final(&state, buf->enc, &nout);
// Append this tail to the output stream.
if (write_wrapped(config, buf->enc, nout) == false) {
return false;
}
// Print optional trailing newline.
if (write_wrapped(config, NULL, 0) == false) {
return false;
}
return true;
}
static inline size_t
find_garbage (const char *p, const size_t avail)
{
// Use a lookup table to distinguish garbage from non-garbage.
static const char lut[256] = {
['A'] = 1, ['B'] = 1, ['C'] = 1, ['D'] = 1, ['E'] = 1,
['F'] = 1, ['G'] = 1, ['H'] = 1, ['I'] = 1, ['J'] = 1,
['K'] = 1, ['L'] = 1, ['M'] = 1, ['N'] = 1, ['O'] = 1,
['P'] = 1, ['Q'] = 1, ['R'] = 1, ['S'] = 1, ['T'] = 1,
['U'] = 1, ['V'] = 1, ['W'] = 1, ['X'] = 1, ['Y'] = 1,
['Z'] = 1,
['a'] = 1, ['b'] = 1, ['c'] = 1, ['d'] = 1, ['e'] = 1,
['f'] = 1, ['g'] = 1, ['h'] = 1, ['i'] = 1, ['j'] = 1,
['k'] = 1, ['l'] = 1, ['m'] = 1, ['n'] = 1, ['o'] = 1,
['p'] = 1, ['q'] = 1, ['r'] = 1, ['s'] = 1, ['t'] = 1,
['u'] = 1, ['v'] = 1, ['w'] = 1, ['x'] = 1, ['y'] = 1,
['z'] = 1,
['0'] = 1, ['1'] = 1, ['2'] = 1, ['3'] = 1, ['4'] = 1,
['5'] = 1, ['6'] = 1, ['7'] = 1, ['8'] = 1, ['9'] = 1,
['+'] = 1, ['/'] = 1, ['"'] = 1,
};
for (size_t len = 0; len < avail; len++) {
if (lut[(unsigned char) p[len]] == 0) {
return len;
}
}
return avail;
}
static inline size_t
find_newline (const char *p, const size_t avail)
{
// This is very naive and can probably be improved by vectorization.
for (size_t len = 0; len < avail; len++) {
if (p[len] == '\n') {
return len;
}
}
return avail;
}
static bool
decode (const struct config *config, struct buffer *buf)
{
size_t avail;
struct base64_state state;
// Initialize the decoder's state structure.
base64_stream_decode_init(&state, 0);
// Read encoded data into the buffer. Use the smallest buffer size to
// be on the safe side: the decoded output will fit the raw buffer.
while ((avail = fread(buf->enc, 1, BUFFER_RAW_SIZE, config->fp)) > 0) {
char *start = buf->enc;
char *outbuf = buf->raw;
size_t ototal = 0;
// By popular demand, this utility tries to be bug-compatible
// with GNU `base64'. That includes silently ignoring newlines
// in the input. Tokenize the input on newline characters.
while (avail > 0) {
size_t outlen, len;
// When stripping garbage or newlines in the input,
// find the offset of the next garbage/newline
// character, which is also the length of the next
// chunk. Otherwise treat the entire input as a single
// chunk.
if (config->ignore_garbage) {
len = find_garbage(start, avail);
} else if (config->strip_newlines) {
len = find_newline(start, avail);
} else {
len = avail;
}
// Ignore empty chunks.
if (len == 0) {
start++;
avail--;
continue;
}
// Decode the chunk into the raw buffer.
if (base64_stream_decode(&state, start, len,
outbuf, &outlen) == 0) {
fprintf(stderr, "%s: %s: decoding error\n",
config->name, config->file);
return false;
}
// Update the output buffer pointer and total size.
outbuf += outlen;
ototal += outlen;
// Bail out if the whole string has been consumed.
if (len == avail) {
break;
}
// Move the start pointer past the newline.
start += len + 1;
avail -= len + 1;
}
// Append the raw data to the output stream.
if (write_stdout(config, buf->raw, ototal) == false) {
return false;
}
}
// Check for stream errors.
if (ferror(config->fp)) {
fprintf(stderr, "%s: %s: read error\n",
config->name, config->file);
return false;
}
return true;
}
static void
usage (FILE *fp, const struct config *config)
{
const char *usage =
"Usage: %s [OPTION]... [FILE]\n"
"If no FILE is given or is specified as '-', "
"read from standard input.\n"
"Options:\n"
" -d, --decode Decode a base64 stream.\n"
" -h, --help Print this help text.\n"
" -i, --ignore-garbage When decoding, ignore any "
"non-base64 data.\n"
" -n, --no-strip-newlines When decoding, do not strip "
"newlines. Speeds up\n"
" decoding of inputs that do not "
"contain newlines.\n"
" -w, --wrap=COLS Wrap encoded lines at this "
"column. Default 76, 0 to\n"
" disable.\n";
fprintf(fp, usage, config->name);
}
static bool
get_wrap (struct config *config, const char *str)
{
char *eptr;
// Reject empty strings.
if (*str == '\0') {
return false;
}
// Convert the input string to a signed long.
const long wrap = strtol(str, &eptr, 10);
// Reject negative numbers.
if (wrap < 0) {
return false;
}
// Reject strings containing non-digits.
if (*eptr != '\0') {
return false;
}
config->wrap = (size_t) wrap;
return true;
}
static bool
parse_opts (int argc, char **argv, struct config *config)
{
int c;
static const struct option opts[] = {
{ "decode", no_argument, NULL, 'd' },
{ "help", no_argument, NULL, 'h' },
{ "ignore-garbage", no_argument, NULL, 'i' },
{ "no-strip-newlines", no_argument, NULL, 'n' },
{ "wrap", required_argument, NULL, 'w' },
{ NULL }
};
// Remember the program's name.
config->name = *argv;
// Parse command line options.
while ((c = getopt_long(argc, argv, ":dhinw:", opts, NULL)) != -1) {
switch (c) {
case 'd':
config->decode = true;
break;
case 'h':
config->print_help = true;
return true;
case 'i':
config->ignore_garbage = true;
break;
case 'n':
config->strip_newlines = false;
break;
case 'w':
if (get_wrap(config, optarg) == false) {
fprintf(stderr,
"%s: invalid wrap value '%s'\n",
config->name, optarg);
return false;
}
break;
case ':':
fprintf(stderr, "%s: missing argument for '%c'\n",
config->name, optopt);
return false;
default:
fprintf(stderr, "%s: unknown option '%c'\n",
config->name, optopt);
return false;
}
}
// Return successfully if no filename was given.
if (optind >= argc) {
return true;
}
// Return unsuccessfully if more than one filename was given.
if (optind + 1 < argc) {
fprintf(stderr, "%s: too many files\n", config->name);
return false;
}
// For compatibility with GNU Coreutils base64, treat a filename of '-'
// as standard input.
if (strcmp(argv[optind], "-") == 0) {
return true;
}
// Save the name of the file.
config->file = argv[optind];
// Open the file.
if ((config->fp = fopen(config->file, "rb")) == NULL) {
fprintf(stderr, "%s: %s: %s\n",
config->name, config->file, strerror(errno));
return false;
}
return true;
}
int
main (int argc, char **argv)
{
// Default program config.
struct config config = {
.file = "stdin",
.fp = stdin,
.wrap = 76,
.decode = false,
.print_help = false,
.strip_newlines = true,
.ignore_garbage = false,
};
struct buffer buf;
// Parse options from the command line.
if (parse_opts(argc, argv, &config) == false) {
usage(stderr, &config);
return 1;
}
// Return early if the user just wanted the help text.
if (config.print_help) {
usage(stdout, &config);
return 0;
}
// Allocate buffers.
if (buffer_alloc(&config, &buf) == false) {
return 1;
}
#ifdef WIN
// On Windows platforms, ensure that stdout is binary-clean, and
// newlines at the end of the line are not silently converted to CRLFs.
// This seems to be the portable way to do it. freopen() and
// SetConsoleMode() occasionally result in permission errors.
_setmode(1, _O_BINARY);
#endif
// Encode or decode the input based on the user's choice.
const bool ret = config.decode
? decode(&config, &buf)
: encode(&config, &buf);
// Free the buffers.
buffer_free(&buf);
// Close the input file.
fclose(config.fp);
// Close the output stream.
fclose(stdout);
// That's all, folks.
return ret ? 0 : 1;
}