tr.c (6008B)
1 #include <u.h> 2 #include <libc.h> 3 4 typedef struct PCB /* Control block controlling specification parse */ 5 { 6 char *base; /* start of specification */ 7 char *current; /* current parse point */ 8 long last; /* last Rune returned */ 9 long final; /* final Rune in a span */ 10 } Pcb; 11 12 uchar bits[] = { 1, 2, 4, 8, 16, 32, 64, 128 }; 13 14 #define SETBIT(a, c) ((a)[(c)/8] |= bits[(c)&07]) 15 #define CLEARBIT(a,c) ((a)[(c)/8] &= ~bits[(c)&07]) 16 #define BITSET(a,c) ((a)[(c)/8] & bits[(c)&07]) 17 18 #define MAXRUNE 0xFFFF 19 20 uchar f[(MAXRUNE+1)/8]; 21 uchar t[(MAXRUNE+1)/8]; 22 char wbuf[4096]; 23 char *wptr; 24 25 Pcb pfrom, pto; 26 27 int cflag; 28 int dflag; 29 int sflag; 30 31 void complement(void); 32 void delete(void); 33 void squeeze(void); 34 void translit(void); 35 void error(char*); 36 long canon(Pcb*); 37 char *getrune(char*, Rune*); 38 void Pinit(Pcb*, char*); 39 void Prewind(Pcb *p); 40 int readrune(int, long*); 41 void wflush(int); 42 void writerune(int, Rune); 43 44 void 45 main(int argc, char **argv) 46 { 47 ARGBEGIN{ 48 case 's': sflag++; break; 49 case 'd': dflag++; break; 50 case 'c': cflag++; break; 51 default: error("bad option"); 52 }ARGEND 53 if(argc>0) 54 Pinit(&pfrom, argv[0]); 55 if(argc>1) 56 Pinit(&pto, argv[1]); 57 if(argc>2) 58 error("arg count"); 59 if(dflag) { 60 if ((sflag && argc != 2) || (!sflag && argc != 1)) 61 error("arg count"); 62 delete(); 63 } else { 64 if (argc != 2) 65 error("arg count"); 66 if (cflag) 67 complement(); 68 else translit(); 69 } 70 exits(0); 71 } 72 73 void 74 delete(void) 75 { 76 long c, last; 77 78 if (cflag) { 79 memset((char *) f, 0xff, sizeof f); 80 while ((c = canon(&pfrom)) >= 0) 81 CLEARBIT(f, c); 82 } else { 83 while ((c = canon(&pfrom)) >= 0) 84 SETBIT(f, c); 85 } 86 if (sflag) { 87 while ((c = canon(&pto)) >= 0) 88 SETBIT(t, c); 89 } 90 91 last = 0x10000; 92 while (readrune(0, &c) > 0) { 93 if(!BITSET(f, c) && (c != last || !BITSET(t,c))) { 94 last = c; 95 writerune(1, (Rune) c); 96 } 97 } 98 wflush(1); 99 } 100 101 void 102 complement(void) 103 { 104 Rune *p; 105 int i; 106 long from, to, lastc, high; 107 108 lastc = 0; 109 high = 0; 110 while ((from = canon(&pfrom)) >= 0) { 111 if (from > high) high = from; 112 SETBIT(f, from); 113 } 114 while ((to = canon(&pto)) > 0) { 115 if (to > high) high = to; 116 SETBIT(t,to); 117 } 118 Prewind(&pto); 119 if ((p = (Rune *) malloc((high+1)*sizeof(Rune))) == 0) 120 error("can't allocate memory"); 121 for (i = 0; i <= high; i++){ 122 if (!BITSET(f,i)) { 123 if ((to = canon(&pto)) < 0) 124 to = lastc; 125 else lastc = to; 126 p[i] = to; 127 } 128 else p[i] = i; 129 } 130 if (sflag){ 131 lastc = 0x10000; 132 while (readrune(0, &from) > 0) { 133 if (from > high) 134 from = to; 135 else 136 from = p[from]; 137 if (from != lastc || !BITSET(t,from)) { 138 lastc = from; 139 writerune(1, (Rune) from); 140 } 141 } 142 143 } else { 144 while (readrune(0, &from) > 0){ 145 if (from > high) 146 from = to; 147 else 148 from = p[from]; 149 writerune(1, (Rune) from); 150 } 151 } 152 wflush(1); 153 } 154 155 void 156 translit(void) 157 { 158 Rune *p; 159 int i; 160 long from, to, lastc, high; 161 162 lastc = 0; 163 high = 0; 164 while ((from = canon(&pfrom)) >= 0) 165 if (from > high) high = from; 166 Prewind(&pfrom); 167 if ((p = (Rune *) malloc((high+1)*sizeof(Rune))) == 0) 168 error("can't allocate memory"); 169 for (i = 0; i <= high; i++) 170 p[i] = i; 171 while ((from = canon(&pfrom)) >= 0) { 172 if ((to = canon(&pto)) < 0) 173 to = lastc; 174 else lastc = to; 175 if (BITSET(f,from) && p[from] != to) 176 error("ambiguous translation"); 177 SETBIT(f,from); 178 p[from] = to; 179 SETBIT(t,to); 180 } 181 while ((to = canon(&pto)) >= 0) { 182 SETBIT(t,to); 183 } 184 if (sflag){ 185 lastc = 0x10000; 186 while (readrune(0, &from) > 0) { 187 if (from <= high) 188 from = p[from]; 189 if (from != lastc || !BITSET(t,from)) { 190 lastc = from; 191 writerune(1, (Rune) from); 192 } 193 } 194 195 } else { 196 while (readrune(0, &from) > 0) { 197 if (from <= high) 198 from = p[from]; 199 writerune(1, (Rune) from); 200 } 201 } 202 wflush(1); 203 } 204 205 int 206 readrune(int fd, long *rp) 207 { 208 Rune r; 209 int j; 210 static int i, n; 211 static char buf[4096]; 212 213 j = i; 214 for (;;) { 215 if (i >= n) { 216 wflush(1); 217 if (j != i) 218 memcpy(buf, buf+j, n-j); 219 i = n-j; 220 n = read(fd, &buf[i], sizeof(buf)-i); 221 if (n < 0) 222 error("read error"); 223 if (n == 0) 224 return 0; 225 j = 0; 226 n += i; 227 } 228 i++; 229 if (fullrune(&buf[j], i-j)) 230 break; 231 } 232 chartorune(&r, &buf[j]); 233 *rp = r; 234 return 1; 235 } 236 237 void 238 writerune(int fd, Rune r) 239 { 240 char buf[UTFmax]; 241 int n; 242 243 if (!wptr) 244 wptr = wbuf; 245 n = runetochar(buf, (Rune*)&r); 246 if (wptr+n >= wbuf+sizeof(wbuf)) 247 wflush(fd); 248 memcpy(wptr, buf, n); 249 wptr += n; 250 } 251 252 void 253 wflush(int fd) 254 { 255 if (wptr && wptr > wbuf) 256 if (write(fd, wbuf, wptr-wbuf) != wptr-wbuf) 257 error("write error"); 258 wptr = wbuf; 259 } 260 261 char * 262 getrune(char *s, Rune *rp) 263 { 264 Rune r; 265 char *save; 266 int i, n; 267 268 s += chartorune(rp, s); 269 if((r = *rp) == '\\' && *s){ 270 n = 0; 271 if (*s == 'x') { 272 s++; 273 for (i = 0; i < 4; i++) { 274 save = s; 275 s += chartorune(&r, s); 276 if ('0' <= r && r <= '9') 277 n = 16*n + r - '0'; 278 else if ('a' <= r && r <= 'f') 279 n = 16*n + r - 'a' + 10; 280 else if ('A' <= r && r <= 'F') 281 n = 16*n + r - 'A' + 10; 282 else { 283 if (i == 0) 284 *rp = 'x'; 285 else *rp = n; 286 return save; 287 } 288 } 289 } else { 290 for(i = 0; i < 3; i++) { 291 save = s; 292 s += chartorune(&r, s); 293 if('0' <= r && r <= '7') 294 n = 8*n + r - '0'; 295 else { 296 if (i == 0) 297 { 298 *rp = r; 299 return s; 300 } 301 *rp = n; 302 return save; 303 } 304 } 305 if(n > 0377) 306 error("char>0377"); 307 } 308 *rp = n; 309 } 310 return s; 311 } 312 313 long 314 canon(Pcb *p) 315 { 316 Rune r; 317 318 if (p->final >= 0) { 319 if (p->last < p->final) 320 return ++p->last; 321 p->final = -1; 322 } 323 if (*p->current == '\0') 324 return -1; 325 if(*p->current == '-' && p->last >= 0 && p->current[1]){ 326 p->current = getrune(p->current+1, &r); 327 if (r < p->last) 328 error ("Invalid range specification"); 329 if (r > p->last) { 330 p->final = r; 331 return ++p->last; 332 } 333 } 334 p->current = getrune(p->current, &r); 335 p->last = r; 336 return p->last; 337 } 338 339 void 340 Pinit(Pcb *p, char *cp) 341 { 342 p->current = p->base = cp; 343 p->last = p->final = -1; 344 } 345 void 346 Prewind(Pcb *p) 347 { 348 p->current = p->base; 349 p->last = p->final = -1; 350 } 351 void 352 error(char *s) 353 { 354 fprint(2, "%s: %s\n", argv0, s); 355 exits(s); 356 }