split.c (3257B)
1 #include <u.h> 2 #include <libc.h> 3 #include <bio.h> 4 #include <ctype.h> 5 #include <regexp.h> 6 7 char digit[] = "0123456789"; 8 char *suffix = ""; 9 char *stem = "x"; 10 char suff[] = "aa"; 11 char name[200]; 12 Biobuf bout; 13 Biobuf *output = &bout; 14 15 extern int nextfile(void); 16 extern int matchfile(Resub*); 17 extern void openf(void); 18 extern char *fold(char*,int); 19 extern void usage(void); 20 extern void badexp(void); 21 22 void 23 main(int argc, char *argv[]) 24 { 25 Reprog *exp; 26 char *pattern = 0; 27 int n = 1000; 28 char *line; 29 int xflag = 0; 30 int iflag = 0; 31 Biobuf bin; 32 Biobuf *b = &bin; 33 char buf[256]; 34 35 ARGBEGIN { 36 case 'l': 37 case 'n': 38 n=atoi(EARGF(usage())); 39 break; 40 case 'e': 41 pattern = strdup(EARGF(usage())); 42 break; 43 case 'f': 44 stem = strdup(EARGF(usage())); 45 break; 46 case 's': 47 suffix = strdup(EARGF(usage())); 48 break; 49 case 'x': 50 xflag++; 51 break; 52 case 'i': 53 iflag++; 54 break; 55 default: 56 usage(); 57 break; 58 59 } ARGEND; 60 61 if(argc < 0 || argc > 1) 62 usage(); 63 64 if(argc != 0) { 65 b = Bopen(argv[0], OREAD); 66 if(b == nil) { 67 fprint(2, "split: can't open %s: %r\n", argv[0]); 68 exits("open"); 69 } 70 } else 71 Binit(b, 0, OREAD); 72 73 if(pattern) { 74 if(!(exp = regcomp(iflag? fold(pattern,strlen(pattern)): pattern))) 75 badexp(); 76 while((line=Brdline(b,'\n')) != 0) { 77 Resub match[2]; 78 memset(match, 0, sizeof match); 79 line[Blinelen(b)-1] = 0; 80 if(regexec(exp,iflag?fold(line,Blinelen(b)-1):line,match,2)) { 81 if(matchfile(match) && xflag) 82 continue; 83 } else if(output == 0) 84 nextfile(); /* at most once */ 85 Bwrite(output, line, Blinelen(b)-1); 86 Bputc(output, '\n'); 87 } 88 } else { 89 int linecnt = n; 90 91 while((line=Brdline(b,'\n')) != 0) { 92 if(++linecnt > n) { 93 nextfile(); 94 linecnt = 1; 95 } 96 Bwrite(output, line, Blinelen(b)); 97 } 98 99 /* 100 * in case we didn't end with a newline, tack whatever's 101 * left onto the last file 102 */ 103 while((n = Bread(b, buf, sizeof(buf))) > 0) 104 Bwrite(output, buf, n); 105 } 106 if(b != nil) 107 Bterm(b); 108 exits(0); 109 } 110 111 int 112 nextfile(void) 113 { 114 static int canopen = 1; 115 if(suff[0] > 'z') { 116 if(canopen) 117 fprint(2, "split: file %szz not split\n",stem); 118 canopen = 0; 119 } else { 120 strcpy(name, stem); 121 strcat(name, suff); 122 if(++suff[1] > 'z') 123 suff[1] = 'a', ++suff[0]; 124 openf(); 125 } 126 return canopen; 127 } 128 129 int 130 matchfile(Resub *match) 131 { 132 if(match[1].s.sp) { 133 int len = match[1].e.ep - match[1].s.sp; 134 strncpy(name, match[1].s.sp, len); 135 strcpy(name+len, suffix); 136 openf(); 137 return 1; 138 } 139 return nextfile(); 140 } 141 142 void 143 openf(void) 144 { 145 static int fd = 0; 146 Bflush(output); 147 Bterm(output); 148 if(fd > 0) 149 close(fd); 150 fd = create(name,OWRITE,0666); 151 if(fd < 0) { 152 fprint(2, "grep: can't create %s: %r\n", name); 153 exits("create"); 154 } 155 Binit(output, fd, OWRITE); 156 } 157 158 char * 159 fold(char *s, int n) 160 { 161 static char *fline; 162 static int linesize = 0; 163 char *t; 164 165 if(linesize < n+1){ 166 fline = realloc(fline,n+1); 167 linesize = n+1; 168 } 169 for(t=fline; *t++ = tolower((uchar)*s++); ) 170 continue; 171 /* we assume the 'A'-'Z' only appear as themselves 172 * in a utf encoding. 173 */ 174 return fline; 175 } 176 177 void 178 usage(void) 179 { 180 fprint(2, "usage: split [-n num] [-e exp] [-f stem] [-s suff] [-x] [-i] [file]\n"); 181 exits("usage"); 182 } 183 184 void 185 badexp(void) 186 { 187 fprint(2, "split: bad regular expression\n"); 188 exits("bad regular expression"); 189 }