/
unescape.c
186 lines (164 loc) · 2.92 KB
/
unescape.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
// strip all ANSI/VT100/VT220/&etc terminal control characters from a file.
// Originally created to clean up a tmux logfile.
//
// TODO beware UTF8
//
// David Poole 20220203 davep@mbuf.com
//
// https://espterm.github.io/docs/VT100%20escape%20codes.html
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
#include <assert.h>
#define NUL 0x00
#define BS 0x08
#define LF 0x0a
#define CR 0x0d
#define ESC 0x1b
#define DEL 0x7f
#define MAX_LINE 1024
typedef enum {
STATE_INIT = 1,
STATE_LEFT_BRACKET,
STATE_HASH,
STATE_NUMBER,
STATE_QUESTION,
STATE_PAREN,
STATE_EAT_1,
STATE_DONE
} parse_state;
int eat_escape_sequence(FILE *infile)
{
int n;
parse_state state = STATE_INIT;
while(state != STATE_DONE) {
n = fgetc(infile);
if (n == EOF) {
return EOF;
}
switch (state) {
case STATE_INIT:
// three or more character escape sequences
if (n=='[') {
state = STATE_LEFT_BRACKET;
}
else if (n=='#') {
state = STATE_HASH;
}
else {
// two character escape sequence; eat the next char
state = STATE_DONE;
}
break;
case STATE_LEFT_BRACKET:
if (strchr("1234567890", n)) {
state = STATE_NUMBER;
}
else if ( n == '?') {
state = STATE_QUESTION;
}
else if ( n == '(' || n == ')' ) {
state = STATE_PAREN;
}
else {
// otherwise we have a 3-character sequence so just eat the next char and be done
state = STATE_DONE;
}
break;
case STATE_QUESTION:
if (strchr("1234567890", n)) {
state = STATE_NUMBER;
}
else {
state = STATE_EAT_1;
}
break;
case STATE_EAT_1:
case STATE_HASH:
case STATE_PAREN:
state = STATE_DONE;
break;
case STATE_NUMBER:
if (strchr("1234567890", n)) {
state = STATE_NUMBER;
}
else if (n == ';') {
state = STATE_NUMBER;
}
else {
state = STATE_DONE;
}
break;
case STATE_DONE:
break;
default:
assert(0);
}
}
return n;
}
// +1 for terminating NULL to make a C printable string
static int line[MAX_LINE+1];
static int end=0;
static bool push(int n)
{
line[end++] = n;
if ( (end+1) >= MAX_LINE) {
// buffer full
return true;
}
return false;
}
static void reset(void)
{
end = 0;
}
static void dump(FILE *stream)
{
for (int i=0 ; i<end ; i++) {
fputc(line[i], stream);
}
}
static void backspace(void)
{
if (end > 0) {
end --;
}
}
int main(void)
{
// FILE *infile = fopen("/home/dpoole/tmp/fragattack.log", "r");
FILE *infile = stdin;
FILE *outfile = stdout;
int n;
while(1) {
n = fgetc(infile);
if (n == EOF) {
break;
}
if (n == ESC) {
n = eat_escape_sequence(infile);
if (n == EOF) {
break;
}
}
else if (n == CR) {
// eat successive CRCRCR
}
else if (n == LF) {
push('\n');
}
else if (n == BS) {
backspace();
}
else {
if (push(n)) {
dump(outfile);
reset();
}
}
}
dump(outfile);
return EXIT_SUCCESS;
}