sbase

suckless unix tools
git clone git://git.suckless.org/sbase
Log | Files | Refs | README | LICENSE

tr.c (6458B)


      1 /* See LICENSE file for copyright and license details. */
      2 #include <stdlib.h>
      3 
      4 #include "utf.h"
      5 #include "util.h"
      6 
      7 static int cflag = 0;
      8 static int dflag = 0;
      9 static int sflag = 0;
     10 
     11 struct range {
     12 	Rune   start;
     13 	Rune   end;
     14 	size_t quant;
     15 };
     16 
     17 static struct {
     18 	char    *name;
     19 	int    (*check)(Rune);
     20 } classes[] = {
     21 	{ "alnum",  isalnumrune  },
     22 	{ "alpha",  isalpharune  },
     23 	{ "blank",  isblankrune  },
     24 	{ "cntrl",  iscntrlrune  },
     25 	{ "digit",  isdigitrune  },
     26 	{ "graph",  isgraphrune  },
     27 	{ "lower",  islowerrune  },
     28 	{ "print",  isprintrune  },
     29 	{ "punct",  ispunctrune  },
     30 	{ "space",  isspacerune  },
     31 	{ "upper",  isupperrune  },
     32 	{ "xdigit", isxdigitrune },
     33 };
     34 
     35 #define ISLOWERBIT 		   1U << 6
     36 #define ISUPPERBIT 		   1U << 10
     37 
     38 static struct   range *set1 = NULL;
     39 static size_t   set1ranges  = 0;
     40 static unsigned set1checks  = 0;
     41 static struct   range *set2 = NULL;
     42 static size_t   set2ranges  = 0;
     43 static unsigned set2checks  = 0;
     44 
     45 static int
     46 check(Rune rune, unsigned checks)
     47 {
     48 	size_t i;
     49 
     50 	for (i = 0; checks && i < LEN(classes); i++, checks >>= 1)
     51 		if (checks & 1 && classes[i].check(rune))
     52 			return 1;
     53 
     54 	return 0;
     55 }
     56 
     57 static size_t
     58 rangelen(struct range r)
     59 {
     60 	return (r.end - r.start + 1) * r.quant;
     61 }
     62 
     63 static size_t
     64 setlen(struct range *set, size_t setranges)
     65 {
     66 	size_t len = 0, i;
     67 
     68 	for (i = 0; i < setranges; i++)
     69 		len += rangelen(set[i]);
     70 
     71 	return len;
     72 }
     73 
     74 static int
     75 rstrmatch(Rune *r, char *s, size_t n)
     76 {
     77 	size_t i;
     78 
     79 	for (i = 0; i < n; i++)
     80 		if (r[i] != s[i])
     81 			return 0;
     82 	return 1;
     83 }
     84 
     85 static size_t
     86 makeset(char *str, struct range **set, unsigned *checks)
     87 {
     88 	Rune  *rstr;
     89 	size_t len, i, j, m, n;
     90 	size_t q, setranges = 0;
     91 	int    factor, base;
     92 
     93 	/* rstr defines at most len ranges */
     94 	unescape(str);
     95 	rstr = ereallocarray(NULL, utflen(str) + 1, sizeof(*rstr));
     96 	len = utftorunestr(str, rstr);
     97 	*set = ereallocarray(NULL, len, sizeof(**set));
     98 
     99 	for (i = 0; i < len; i++) {
    100 		if (rstr[i] == '[') {
    101 			j = i;
    102 nextbrack:
    103 			if (j >= len)
    104 				goto literal;
    105 			for (m = j; m < len; m++)
    106 				if (rstr[m] == ']') {
    107 					j = m;
    108 					break;
    109 				}
    110 			if (j == i)
    111 				goto literal;
    112 
    113 			/* CLASSES [=EQUIV=] (skip) */
    114 			if (j - i > 3 && rstr[i + 1] == '=' && rstr[m - 1] == '=') {
    115 				if (j - i != 4)
    116 					goto literal;
    117 				(*set)[setranges].start = rstr[i + 2];
    118 				(*set)[setranges].end   = rstr[i + 2];
    119 				(*set)[setranges].quant = 1;
    120 				setranges++;
    121 				i = j;
    122 				continue;
    123 			}
    124 
    125 			/* CLASSES [:CLASS:] */
    126 			if (j - i > 3 && rstr[i + 1] == ':' && rstr[m - 1] == ':') {
    127 				for (n = 0; n < LEN(classes); n++) {
    128 					if (rstrmatch(rstr + i + 2, classes[n].name, j - i - 3)) {
    129 						*checks |= 1 << n;
    130 						i = j;
    131 						break;
    132 					}
    133 				}
    134 				if (n < LEN(classes))
    135 					continue;
    136 				eprintf("Invalid character class.\n");
    137 			}
    138 
    139 			/* REPEAT  [_*n] (only allowed in set2) */
    140 			if (j - i > 2 && rstr[i + 2] == '*') {
    141 				/* check if right side of '*' is a number */
    142 				q = 0;
    143 				factor = 1;
    144 				base = (rstr[i + 3] == '0') ? 8 : 10;
    145 				for (n = j - 1; n > i + 2; n--) {
    146 					if (rstr[n] < '0' || rstr[n] > '9') {
    147 						n = 0;
    148 						break;
    149 					}
    150 					q += (rstr[n] - '0') * factor;
    151 					factor *= base;
    152 				}
    153 				if (n == 0) {
    154 					j = m + 1;
    155 					goto nextbrack;
    156 				}
    157 				(*set)[setranges].start = rstr[i + 1];
    158 				(*set)[setranges].end   = rstr[i + 1];
    159 				(*set)[setranges].quant = q ? q : setlen(set1, MAX(set1ranges, 1));
    160 				setranges++;
    161 				i = j;
    162 				continue;
    163 			}
    164 
    165 			j = m + 1;
    166 			goto nextbrack;
    167 		}
    168 literal:
    169 		/* RANGES [_-__-_], _-__-_ */
    170 		/* LITERALS _______ */
    171 		(*set)[setranges].start = rstr[i];
    172 
    173 		if (i < len - 2 && rstr[i + 1] == '-' && rstr[i + 2] >= rstr[i])
    174 			i += 2;
    175 		(*set)[setranges].end = rstr[i];
    176 		(*set)[setranges].quant = 1;
    177 		setranges++;
    178 	}
    179 
    180 	free(rstr);
    181 	return setranges;
    182 }
    183 
    184 static void
    185 usage(void)
    186 {
    187 	eprintf("usage: %s [-cCds] set1 [set2]\n", argv0);
    188 }
    189 
    190 int
    191 main(int argc, char *argv[])
    192 {
    193 	Rune r, lastrune = 0;
    194 	size_t off1, off2, i, m;
    195 	int ret = 0;
    196 
    197 	ARGBEGIN {
    198 	case 'c':
    199 	case 'C':
    200 		cflag = 1;
    201 		break;
    202 	case 'd':
    203 		dflag = 1;
    204 		break;
    205 	case 's':
    206 		sflag = 1;
    207 		break;
    208 	default:
    209 		usage();
    210 	} ARGEND
    211 
    212 	if (!argc || argc > 2 || (dflag == sflag && argc != 2) ||
    213 	    (dflag && argc != 1))
    214 		usage();
    215 
    216 	set1ranges = makeset(argv[0], &set1, &set1checks);
    217 	if (argc == 2) {
    218 		set2ranges = makeset(argv[1], &set2, &set2checks);
    219 		/* sanity checks as we are translating */
    220 		if (!set2ranges && !set2checks)
    221 			eprintf("cannot map to an empty set.\n");
    222 		if (set2checks && set2checks != ISLOWERBIT &&
    223 		    set2checks != ISUPPERBIT) {
    224 			eprintf("can only map to 'lower' and 'upper' class.\n");
    225 		}
    226 	}
    227 read:
    228 	if (!efgetrune(&r, stdin, "<stdin>")) {
    229 		ret |= fshut(stdin, "<stdin>") | fshut(stdout, "<stdout>");
    230 		return ret;
    231 	}
    232 	if (argc == 1 && sflag)
    233 		goto write;
    234 	for (i = 0, off1 = 0; i < set1ranges; off1 += rangelen(set1[i]), i++) {
    235 		if (set1[i].start <= r && r <= set1[i].end) {
    236 			if (dflag) {
    237 				if (cflag)
    238 					goto write;
    239 				else
    240 					goto read;
    241 			}
    242 			if (cflag)
    243 				goto write;
    244 
    245 			/* map r to set2 */
    246 			if (set2checks) {
    247 				if (set2checks == ISLOWERBIT)
    248 					r = tolowerrune(r);
    249 				else
    250 					r = toupperrune(r);
    251 			} else {
    252 				off1 += r - set1[i].start;
    253 				if (off1 > setlen(set2, set2ranges) - 1) {
    254 					r = set2[set2ranges - 1].end;
    255 					goto write;
    256 				}
    257 				for (m = 0, off2 = 0; m < set2ranges; m++) {
    258 					if (off2 + rangelen(set2[m]) > off1) {
    259 						m++;
    260 						break;
    261 					}
    262 					off2 += rangelen(set2[m]);
    263 				}
    264 				m--;
    265 				r = set2[m].start + (off1 - off2) / set2[m].quant;
    266 			}
    267 			goto write;
    268 		}
    269 	}
    270 	if (check(r, set1checks)) {
    271 		if (cflag)
    272 			goto write;
    273 		if (dflag)
    274 			goto read;
    275 		if (set2checks) {
    276 			if (set2checks == ISLOWERBIT)
    277 				r = tolowerrune(r);
    278 			else
    279 				r = toupperrune(r);
    280 		} else {
    281 			r = set2[set2ranges - 1].end;
    282 		}
    283 		goto write;
    284 	}
    285 	if (!dflag && cflag) {
    286 		if (set2checks) {
    287 			if (set2checks == ISLOWERBIT)
    288 				r = tolowerrune(r);
    289 			else
    290 				r = toupperrune(r);
    291 		} else {
    292 			r = set2[set2ranges - 1].end;
    293 		}
    294 		goto write;
    295 	}
    296 	if (dflag && cflag)
    297 		goto read;
    298 write:
    299 	if (argc == 1 && sflag && r == lastrune) {
    300 		if (check(r, set1checks))
    301 			goto read;
    302 		for (i = 0; i < set1ranges; i++) {
    303 			if (set1[i].start <= r && r <= set1[i].end)
    304 				goto read;
    305 		}
    306 	}
    307 	if (argc == 2 && sflag && r == lastrune) {
    308 		if (set2checks && check(r, set2checks))
    309 			goto read;
    310 		for (i = 0; i < set2ranges; i++) {
    311 			if (set2[i].start <= r && r <= set2[i].end)
    312 				goto read;
    313 		}
    314 	}
    315 	efputrune(&r, stdout, "<stdout>");
    316 	lastrune = r;
    317 	goto read;
    318 }