Logo Search packages:      
Sourcecode: pwlib version File versions

split.c

#include <stdio.h>
#include <string.h>

/*
 - split - divide a string into fields, like awk split()
 = int split(char *string, char *fields[], int nfields, char *sep);
 */
int                     /* number of fields, including overflow */
split(string, fields, nfields, sep)
char *string;
char *fields[];               /* list is not NULL-terminated */
int nfields;                  /* number of entries available in fields[] */
char *sep;              /* "" white, "c" single char, "ab" [ab]+ */
{
      register char *p = string;
      register char c;              /* latest character */
      register char sepc = sep[0];
      register char sepc2;
      register int fn;
      register char **fp = fields;
      register char *sepp;
      register int trimtrail;

      /* white space */
      if (sepc == '\0') {
            while ((c = *p++) == ' ' || c == '\t')
                  continue;
            p--;
            trimtrail = 1;
            sep = " \t";      /* note, code below knows this is 2 long */
            sepc = ' ';
      } else
            trimtrail = 0;
      sepc2 = sep[1];         /* now we can safely pick this up */

      /* catch empties */
      if (*p == '\0')
            return(0);

      /* single separator */
      if (sepc2 == '\0') {
            fn = nfields;
            for (;;) {
                  *fp++ = p;
                  fn--;
                  if (fn == 0)
                        break;
                  while ((c = *p++) != sepc)
                        if (c == '\0')
                              return(nfields - fn);
                  *(p-1) = '\0';
            }
            /* we have overflowed the fields vector -- just count them */
            fn = nfields;
            for (;;) {
                  while ((c = *p++) != sepc)
                        if (c == '\0')
                              return(fn);
                  fn++;
            }
            /* not reached */
      }

      /* two separators */
      if (sep[2] == '\0') {
            fn = nfields;
            for (;;) {
                  *fp++ = p;
                  fn--;
                  while ((c = *p++) != sepc && c != sepc2)
                        if (c == '\0') {
                              if (trimtrail && **(fp-1) == '\0')
                                    fn++;
                              return(nfields - fn);
                        }
                  if (fn == 0)
                        break;
                  *(p-1) = '\0';
                  while ((c = *p++) == sepc || c == sepc2)
                        continue;
                  p--;
            }
            /* we have overflowed the fields vector -- just count them */
            fn = nfields;
            while (c != '\0') {
                  while ((c = *p++) == sepc || c == sepc2)
                        continue;
                  p--;
                  fn++;
                  while ((c = *p++) != '\0' && c != sepc && c != sepc2)
                        continue;
            }
            /* might have to trim trailing white space */
            if (trimtrail) {
                  p--;
                  while ((c = *--p) == sepc || c == sepc2)
                        continue;
                  p++;
                  if (*p != '\0') {
                        if (fn == nfields+1)
                              *p = '\0';
                        fn--;
                  }
            }
            return(fn);
      }

      /* n separators */
      fn = 0;
      for (;;) {
            if (fn < nfields)
                  *fp++ = p;
            fn++;
            for (;;) {
                  c = *p++;
                  if (c == '\0')
                        return(fn);
                  sepp = sep;
                  while ((sepc = *sepp++) != '\0' && sepc != c)
                        continue;
                  if (sepc != '\0') /* it was a separator */
                        break;
            }
            if (fn < nfields)
                  *(p-1) = '\0';
            for (;;) {
                  c = *p++;
                  sepp = sep;
                  while ((sepc = *sepp++) != '\0' && sepc != c)
                        continue;
                  if (sepc == '\0') /* it wasn't a separator */
                        break;
            }
            p--;
      }

      /* not reached */
}

#ifdef TEST_SPLIT


/*
 * test program
 * pgm            runs regression
 * pgm sep  splits stdin lines by sep
 * pgm str sep    splits str by sep
 * pgm str sep n  splits str by sep n times
 */
int
main(argc, argv)
int argc;
char *argv[];
{
      char buf[512];
      register int n;
#     define      MNF   10
      char *fields[MNF];

      if (argc > 4)
            for (n = atoi(argv[3]); n > 0; n--) {
                  (void) strcpy(buf, argv[1]);
            }
      else if (argc > 3)
            for (n = atoi(argv[3]); n > 0; n--) {
                  (void) strcpy(buf, argv[1]);
                  (void) split(buf, fields, MNF, argv[2]);
            }
      else if (argc > 2)
            dosplit(argv[1], argv[2]);
      else if (argc > 1)
            while (fgets(buf, sizeof(buf), stdin) != NULL) {
                  buf[strlen(buf)-1] = '\0';    /* stomp newline */
                  dosplit(buf, argv[1]);
            }
      else
            regress();

      exit(0);
}

dosplit(string, seps)
char *string;
char *seps;
{
#     define      NF    5
      char *fields[NF];
      register int nf;

      nf = split(string, fields, NF, seps);
      print(nf, NF, fields);
}

print(nf, nfp, fields)
int nf;
int nfp;
char *fields[];
{
      register int fn;
      register int bound;

      bound = (nf > nfp) ? nfp : nf;
      printf("%d:\t", nf);
      for (fn = 0; fn < bound; fn++)
            printf("\"%s\"%s", fields[fn], (fn+1 < nf) ? ", " : "\n");
}

#define     RNF   5           /* some table entries know this */
struct {
      char *str;
      char *seps;
      int nf;
      char *fi[RNF];
} tests[] = {
      "",         " ",  0,    { "" },
      " ",        " ",  2,    { "", "" },
      "x",        " ",  1,    { "x" },
      "xy",       " ",  1,    { "xy" },
      "x y",            " ",  2,    { "x", "y" },
      "abc def  g ",    " ",  5,    { "abc", "def", "", "g", "" },
      "  a bcd",  " ",  4,    { "", "", "a", "bcd" },
      "a b c d e f",    " ",  6,    { "a", "b", "c", "d", "e f" },
      " a b c d ",      " ",  6,    { "", "a", "b", "c", "d " },

      "",         " _", 0,    { "" },
      " ",        " _", 2,    { "", "" },
      "x",        " _", 1,    { "x" },
      "x y",            " _", 2,    { "x", "y" },
      "ab _ cd",  " _", 2,    { "ab", "cd" },
      " a_b  c ", " _", 5,    { "", "a", "b", "c", "" },
      "a b c_d e f",    " _", 6,    { "a", "b", "c", "d", "e f" },
      " a b c d ",      " _", 6,    { "", "a", "b", "c", "d " },

      "",         " _~",      0,    { "" },
      " ",        " _~",      2,    { "", "" },
      "x",        " _~",      1,    { "x" },
      "x y",            " _~",      2,    { "x", "y" },
      "ab _~ cd", " _~",      2,    { "ab", "cd" },
      " a_b  c~", " _~",      5,    { "", "a", "b", "c", "" },
      "a b_c d~e f",    " _~",      6,    { "a", "b", "c", "d", "e f" },
      "~a b c d ",      " _~",      6,    { "", "a", "b", "c", "d " },

      "",         " _~-",     0,    { "" },
      " ",        " _~-",     2,    { "", "" },
      "x",        " _~-",     1,    { "x" },
      "x y",            " _~-",     2,    { "x", "y" },
      "ab _~- cd",      " _~-",     2,    { "ab", "cd" },
      " a_b  c~", " _~-",     5,    { "", "a", "b", "c", "" },
      "a b_c-d~e f",    " _~-",     6,    { "a", "b", "c", "d", "e f" },
      "~a-b c d ",      " _~-",     6,    { "", "a", "b", "c", "d " },

      "",         "  ", 0,    { "" },
      " ",        "  ", 2,    { "", "" },
      "x",        "  ", 1,    { "x" },
      "xy",       "  ", 1,    { "xy" },
      "x y",            "  ", 2,    { "x", "y" },
      "abc def  g ",    "  ", 4,    { "abc", "def", "g", "" },
      "  a bcd",  "  ", 3,    { "", "a", "bcd" },
      "a b c d e f",    "  ", 6,    { "a", "b", "c", "d", "e f" },
      " a b c d ",      "  ", 6,    { "", "a", "b", "c", "d " },

      "",         "",   0,    { "" },
      " ",        "",   0,    { "" },
      "x",        "",   1,    { "x" },
      "xy",       "",   1,    { "xy" },
      "x y",            "",   2,    { "x", "y" },
      "abc def  g ",    "",   3,    { "abc", "def", "g" },
      "\t a bcd", "",   2,    { "a", "bcd" },
      "  a \tb\t c ",   "",   3,    { "a", "b", "c" },
      "a b c d e ",     "",   5,    { "a", "b", "c", "d", "e" },
      "a b\tc d e f",   "",   6,    { "a", "b", "c", "d", "e f" },
      " a b c d e f ",  "",   6,    { "a", "b", "c", "d", "e f " },

      NULL,       NULL, 0,    { NULL },
};

regress()
{
      char buf[512];
      register int n;
      char *fields[RNF+1];
      register int nf;
      register int i;
      register int printit;
      register char *f;

      for (n = 0; tests[n].str != NULL; n++) {
            (void) strcpy(buf, tests[n].str);
            fields[RNF] = NULL;
            nf = split(buf, fields, RNF, tests[n].seps);
            printit = 0;
            if (nf != tests[n].nf) {
                  printf("split `%s' by `%s' gave %d fields, not %d\n",
                        tests[n].str, tests[n].seps, nf, tests[n].nf);
                  printit = 1;
            } else if (fields[RNF] != NULL) {
                  printf("split() went beyond array end\n");
                  printit = 1;
            } else {
                  for (i = 0; i < nf && i < RNF; i++) {
                        f = fields[i];
                        if (f == NULL)
                              f = "(NULL)";
                        if (strcmp(f, tests[n].fi[i]) != 0) {
                              printf("split `%s' by `%s', field %d is `%s', not `%s'\n",
                                    tests[n].str, tests[n].seps,
                                    i, fields[i], tests[n].fi[i]);
                              printit = 1;
                        }
                  }
            }
            if (printit)
                  print(nf, RNF, fields);
      }
}
#endif

Generated by  Doxygen 1.6.0   Back to index