ircd/ircd_string.c

   1 /*
   2  * IRC - Internet Relay Chat, ircd/ircd_string.c
   3  * Copyright (C) 1999 Thomas Helvey
   4  *
   5  * This program is free software; you can redistribute it and/or modify
   6  * it under the terms of the GNU General Public License as published by
   7  * the Free Software Foundation; either version 1, or (at your option)
   8  * any later version.
   9  *
  10  * This program is distributed in the hope that it will be useful,
  11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13  * GNU General Public License for more details.
  14  *
  15  * You should have received a copy of the GNU General Public License
  16  * along with this program; if not, write to the Free Software
  17  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  18  */
  19 /** @file
  20  * @brief Implementation of string operations.
  21  * @version $Id$
  22  */
  23 #include "config.h"
  24
  25 #include "ircd_string.h"
  26 #include "ircd_defs.h"
  27 #include "ircd_chattr.h"
  28 #include "ircd_log.h"
  29 #include "res.h"
  30
  31 /* #include <assert.h> -- Now using assert in ircd_log.h */
  32 #include <string.h>
  33 #include <sys/types.h>
  34 #include <netinet/in.h>
  35
  36 /*
  37  * include the character attribute tables here
  38  */
  39 #include "chattr.tab.c"
  40
  41 /** Check whether \a str contains wildcard characters.
  42  * @param[in] str String that might contain wildcards.
  43  * @return Non-zero if \a str contains naked (non-escaped) wildcards,
  44  * zero if there are none or if they are all escaped.
  45  */
  46 int string_has_wildcards(const char* str)
  47 {
  48   assert(0 != str);
  49   for ( ; *str; ++str) {
  50     if ('\\' == *str) {
  51       if ('\0' == *++str)
  52         break;
  53     }
  54     else if ('*' == *str || '?' == *str)
  55       return 1;
  56   }
  57   return 0;
  58 }
  59
  60 /** Split a string on certain delimiters.
  61  * This is a reentrant version of normal strtok().  The first call for
  62  * a particular input string must use a non-NULL \a str; *save will be
  63  * initialized based on that.  Later calls must use a NULL \a str;
  64  * *save will be updated.
  65  * @param[in,out] save Pointer to a position indicator.
  66  * @param[in] str Pointer to the input string, or NULL to continue.
  67  * @param[in] fs String that lists token delimiters.
  68  * @return Next token in input string, or NULL if no tokens remain.
  69  */
  70 char* ircd_strtok(char **save, char *str, char *fs)
  71 {
  72   char *pos = *save;            /* keep last position across calls */
  73   char *tmp;
  74
  75   if (str)
  76     pos = str;                  /* new string scan */
  77
  78   while (pos && *pos && strchr(fs, *pos) != NULL)
  79     pos++;                      /* skip leading separators */
  80
  81   if (!pos || !*pos)
  82     return (pos = *save = NULL);        /* string contains only sep's */
  83
  84   tmp = pos;                    /* now, keep position of the token */
  85
  86   while (*pos && strchr(fs, *pos) == NULL)
  87     pos++;                      /* skip content of the token */
  88
  89   if (*pos)
  90     *pos++ = '\0';              /* remove first sep after the token */
  91   else
  92     pos = NULL;                 /* end of string */
  93
  94   *save = pos;
  95   return (tmp);
  96 }
  97
  98 /** Rewrite a comma-delimited list of items to remove duplicates.
  99  * @param[in,out] buffer Comma-delimited list.
 100  * @return The input buffer \a buffer.
 101  */
 102 char* canonize(char* buffer)
 103 {
 104   static char cbuf[BUFSIZE];
 105   char*       s;
 106   char*       t;
 107   char*       cp = cbuf;
 108   int         l = 0;
 109   char*       p = NULL;
 110   char*       p2;
 111
 112   *cp = '\0';
 113
 114   for (s = ircd_strtok(&p, buffer, ","); s; s = ircd_strtok(&p, NULL, ","))
 115   {
 116     if (l)
 117     {
 118       p2 = NULL;
 119       for (t = ircd_strtok(&p2, cbuf, ","); t; t = ircd_strtok(&p2, NULL, ","))
 120         if (0 == ircd_strcmp(s, t))
 121           break;
 122         else if (p2)
 123           p2[-1] = ',';
 124     }
 125     else
 126       t = NULL;
 127     if (!t)
 128     {
 129       if (l)
 130         *(cp - 1) = ',';
 131       else
 132         l = 1;
 133       strcpy(cp, s);
 134       if (p)
 135         cp += (p - s);
 136     }
 137     else if (p2)
 138       p2[-1] = ',';
 139   }
 140   return cbuf;
 141 }
 142
 143 /** Copy one string to another, not to exceed a certain length.
 144  * @param[in] s1 Output buffer.
 145  * @param[in] s2 Source buffer.
 146  * @param[in] n Maximum number of bytes to write, plus one.
 147  * @return The original input buffer \a s1.
 148  */
 149 char* ircd_strncpy(char* s1, const char* s2, size_t n)
 150 {
 151   char* endp = s1 + n;
 152   char* s = s1;
 153
 154   assert(0 != s1);
 155   assert(0 != s2);
 156
 157   while (s < endp && (*s++ = *s2++))
 158     ;
 159   return s1;
 160 }
 161
 162
 163 #ifndef FORCEINLINE
 164 NTL_HDR_strChattr { NTL_SRC_strChattr }
 165 NTL_HDR_strCasediff { NTL_SRC_strCasediff }
 166 #endif /* !FORCEINLINE */
 167
 168 /*
 169  * Other functions visible externally
 170  */
 171
 172 /** Case insensitive string comparison.
 173  * @param[in] a First string to compare.
 174  * @param[in] b Second string to compare.
 175  * @return Less than, equal to, or greater than zero if \a a is lexicographically less than, equal to, or greater than \a b.
 176  */
 177 int ircd_strcmp(const char *a, const char *b)
 178 {
 179   const char* ra = a;
 180   const char* rb = b;
 181   while (ToLower(*ra) == ToLower(*rb)) {
 182     if (!*ra++)
 183       return 0;
 184     else
 185       ++rb;
 186   }
 187   return (ToLower(*ra) - ToLower(*rb));
 188 }
 189
 190 /** Case insensitive comparison of the starts of two strings.
 191  * @param[in] a First string to compare.
 192  * @param[in] b Second string to compare.
 193  * @param[in] n Maximum number of characters to compare.
 194  * @return Less than, equal to, or greater than zero if \a a is
 195  * lexicographically less than, equal to, or greater than \a b.
 196  */
 197 int ircd_strncmp(const char *a, const char *b, size_t n)
 198 {
 199   const char* ra = a;
 200   const char* rb = b;
 201   int left = n;
 202   if (!left--)
 203     return 0;
 204   while (ToLower(*ra) == ToLower(*rb)) {
 205     if (!*ra++ || !left--)
 206       return 0;
 207     else
 208       ++rb;
 209   }
 210   return (ToLower(*ra) - ToLower(*rb));
 211 }
 212
 213 /** Fill a vector of distinct names from a delimited input list.
 214  * Empty tokens (when \a token occurs at the start or end of \a list,
 215  * or when \a token occurs adjacent to itself) are ignored.  When
 216  * \a size tokens have been written to \a vector, the rest of the
 217  * string is ignored.
 218  * Unlike token_vector(), if a token repeats an earlier token, it is
 219  * skipped.
 220  * @param[in,out] names Input buffer.
 221  * @param[in] token Delimiter used to split \a list.
 222  * @param[out] vector Output vector.
 223  * @param[in] size Maximum number of elements to put in \a vector.
 224  * @return Number of elements written to \a vector.
 225  */
 226 int unique_name_vector(char* names, char token, char** vector, int size)
 227 {
 228   int   i;
 229   int   count = 0;
 230   char* start = names;
 231   char* end;
 232
 233   assert(0 != names);
 234   assert(0 != vector);
 235   assert(0 < size);
 236
 237   /*
 238    * ignore spurious tokens
 239    */
 240   while (token == *start)
 241     ++start;
 242
 243   for (end = strchr(start, token); end; end = strchr(start, token)) {
 244     *end++ = '\0';
 245     /*
 246      * ignore spurious tokens
 247      */
 248     while (token == *end)
 249       ++end;
 250     for (i = 0; i < count; ++i) {
 251       if (0 == ircd_strcmp(vector[i], start))
 252         break;
 253     }
 254     if (i == count) {
 255       vector[count++] = start;
 256       if (count == size)
 257         return count;
 258     }
 259     start = end;
 260   }
 261   if (*start) {
 262     for (i = 0; i < count; ++i)
 263       if (0 == ircd_strcmp(vector[i], start))
 264         return count;
 265     vector[count++] = start;
 266   }
 267   return count;
 268 }
 269
 270 /** Fill a vector of tokens from a delimited input list.
 271  * Empty tokens (when \a token occurs at the start or end of \a list,
 272  * or when \a token occurs adjacent to itself) are ignored.  When
 273  * \a size tokens have been written to \a vector, the rest of the
 274  * string is ignored.
 275  * @param[in,out] names Input buffer.
 276  * @param[in] token Delimiter used to split \a list.
 277  * @param[out] vector Output vector.
 278  * @param[in] size Maximum number of elements to put in \a vector.
 279  * @return Number of elements written to \a vector.
 280  */
 281 int token_vector(char* names, char token, char** vector, int size)
 282 {
 283   int   count = 0;
 284   char* start = names;
 285   char* end;
 286
 287   assert(0 != names);
 288   assert(0 != vector);
 289   assert(1 < size);
 290
 291   vector[count++] = start;
 292   for (end = strchr(start, token); end; end = strchr(start, token)) {
 293     *end++ = '\0';
 294     start = end;
 295     if (*start) {
 296       vector[count++] = start;
 297       if (count < size)
 298         continue;
 299     }
 300     break;
 301   }
 302   return count;
 303 }
 304
 305 /** Copy all or part of the hostname in a string to another string.
 306  * If \a userhost contains an '\@', the remaining portion is used;
 307  * otherwise, the whole \a userhost is used.
 308  * @param[out] buf Output buffer.
 309  * @param[in] userhost user\@hostname or hostname string.
 310  * @param[in] len Maximum number of bytes to write to \a host.
 311  * @return The output buffer \a buf.
 312  */
 313 char* host_from_uh(char* buf, const char* userhost, size_t len)
 314 {
 315   const char* s;
 316
 317   assert(0 != buf);
 318   assert(0 != userhost);
 319
 320   if ((s = strchr(userhost, '@')))
 321     ++s;
 322   else
 323     s = userhost;
 324   ircd_strncpy(buf, s, len);
 325   buf[len] = '\0';
 326   return buf;
 327 }
 328
 329 /*
 330  * this new faster inet_ntoa was ripped from:
 331  * From: Thomas Helvey <tomh@inxpress.net>
 332  */
 333 /** Array of text strings for dotted quads. */
 334 static const char* IpQuadTab[] =
 335 {
 336     "0",   "1",   "2",   "3",   "4",   "5",   "6",   "7",   "8",   "9",
 337    "10",  "11",  "12",  "13",  "14",  "15",  "16",  "17",  "18",  "19",
 338    "20",  "21",  "22",  "23",  "24",  "25",  "26",  "27",  "28",  "29",
 339    "30",  "31",  "32",  "33",  "34",  "35",  "36",  "37",  "38",  "39",
 340    "40",  "41",  "42",  "43",  "44",  "45",  "46",  "47",  "48",  "49",
 341    "50",  "51",  "52",  "53",  "54",  "55",  "56",  "57",  "58",  "59",
 342    "60",  "61",  "62",  "63",  "64",  "65",  "66",  "67",  "68",  "69",
 343    "70",  "71",  "72",  "73",  "74",  "75",  "76",  "77",  "78",  "79",
 344    "80",  "81",  "82",  "83",  "84",  "85",  "86",  "87",  "88",  "89",
 345    "90",  "91",  "92",  "93",  "94",  "95",  "96",  "97",  "98",  "99",
 346   "100", "101", "102", "103", "104", "105", "106", "107", "108", "109",
 347   "110", "111", "112", "113", "114", "115", "116", "117", "118", "119",
 348   "120", "121", "122", "123", "124", "125", "126", "127", "128", "129",
 349   "130", "131", "132", "133", "134", "135", "136", "137", "138", "139",
 350   "140", "141", "142", "143", "144", "145", "146", "147", "148", "149",
 351   "150", "151", "152", "153", "154", "155", "156", "157", "158", "159",
 352   "160", "161", "162", "163", "164", "165", "166", "167", "168", "169",
 353   "170", "171", "172", "173", "174", "175", "176", "177", "178", "179",
 354   "180", "181", "182", "183", "184", "185", "186", "187", "188", "189",
 355   "190", "191", "192", "193", "194", "195", "196", "197", "198", "199",
 356   "200", "201", "202", "203", "204", "205", "206", "207", "208", "209",
 357   "210", "211", "212", "213", "214", "215", "216", "217", "218", "219",
 358   "220", "221", "222", "223", "224", "225", "226", "227", "228", "229",
 359   "230", "231", "232", "233", "234", "235", "236", "237", "238", "239",
 360   "240", "241", "242", "243", "244", "245", "246", "247", "248", "249",
 361   "250", "251", "252", "253", "254", "255"
 362 };
 363
 364 /** Convert an IP address to printable ASCII form.
 365  * This is generally deprecated in favor of ircd_ntoa_r().
 366  * @param[in] in Address to convert.
 367  * @return Pointer to a static buffer containing the readable form.
 368  */
 369 const char* ircd_ntoa(const struct irc_in_addr* in)
 370 {
 371   static char buf[SOCKIPLEN];
 372   return ircd_ntoa_r(buf, in);
 373 }
 374
 375 /** Convert an IP address to printable ASCII form.
 376  * @param[out] buf Output buffer to write to.
 377  * @param[in] in Address to format.
 378  * @return Pointer to the output buffer \a buf.
 379  */
 380 const char* ircd_ntoa_r(char* buf, const struct irc_in_addr* in)
 381 {
 382     assert(buf != NULL);
 383     assert(in != NULL);
 384
 385     if (irc_in_addr_is_ipv4(in)) {
 386       unsigned int pos, len;
 387       unsigned char *pch;
 388
 389       pch = (unsigned char*)&in->in6_16[6];
 390       len = strlen(IpQuadTab[*pch]);
 391       memcpy(buf, IpQuadTab[*pch++], len);
 392       pos = len;
 393       buf[pos++] = '.';
 394       len = strlen(IpQuadTab[*pch]);
 395       memcpy(buf+pos, IpQuadTab[*pch++], len);
 396       pos += len;
 397       buf[pos++] = '.';
 398       len = strlen(IpQuadTab[*pch]);
 399       memcpy(buf+pos, IpQuadTab[*pch++], len);
 400       pos += len;
 401       buf[pos++] = '.';
 402       len = strlen(IpQuadTab[*pch]);
 403       memcpy(buf+pos, IpQuadTab[*pch++], len);
 404       buf[pos + len] = '\0';
 405       return buf;
 406     } else {
 407       static const char hexdigits[] = "0123456789abcdef";
 408       unsigned int pos, part, max_start, max_zeros, curr_zeros, ii;
 409
 410       /* Find longest run of zeros. */
 411       for (max_start = ii = 1, max_zeros = curr_zeros = 0; ii < 8; ++ii) {
 412         if (!in->in6_16[ii])
 413           curr_zeros++;
 414         else if (curr_zeros > max_zeros) {
 415           max_start = ii - curr_zeros;
 416           max_zeros = curr_zeros;
 417           curr_zeros = 0;
 418         }
 419       }
 420       if (curr_zeros > max_zeros) {
 421         max_start = ii - curr_zeros;
 422         max_zeros = curr_zeros;
 423       }
 424
 425       /* Print out address. */
 426 /** Append \a CH to the output buffer. */
 427 #define APPEND(CH) do { buf[pos++] = (CH); } while (0)
 428       for (pos = ii = 0; (ii < 8); ++ii) {
 429         if ((max_zeros > 0) && (ii == max_start)) {
 430           APPEND(':');
 431           ii += max_zeros - 1;
 432           continue;
 433         }
 434         part = ntohs(in->in6_16[ii]);
 435         if (part >= 0x1000)
 436           APPEND(hexdigits[part >> 12]);
 437         if (part >= 0x100)
 438           APPEND(hexdigits[(part >> 8) & 15]);
 439         if (part >= 0x10)
 440           APPEND(hexdigits[(part >> 4) & 15]);
 441         APPEND(hexdigits[part & 15]);
 442         if (ii < 7)
 443           APPEND(':');
 444       }
 445 #undef APPEND
 446
 447       /* Nul terminate and return number of characters used. */
 448       buf[pos++] = '\0';
 449       return buf;
 450     }
 451 }
 452
 453 /** Attempt to parse an IPv4 address into a network-endian form.
 454  * @param[in] input Input string.
 455  * @param[out] output Network-endian representation of the address.
 456  * @param[out] pbits Number of bits found in pbits.
 457  * @return Number of characters used from \a input, or 0 if the parse failed.
 458  */
 459 static unsigned int
 460 ircd_aton_ip4(const char *input, unsigned int *output, unsigned char *pbits)
 461 {
 462   unsigned int dots = 0, pos = 0, part = 0, ip = 0, bits;
 463
 464   /* Intentionally no support for bizarre IPv4 formats (plain
 465    * integers, octal or hex components) -- only vanilla dotted
 466    * decimal quads.
 467    */
 468   if (input[0] == '.')
 469     return 0;
 470   bits = 32;
 471   while (1) switch (input[pos]) {
 472   case '\0':
 473     if (dots < 3)
 474       return 0;
 475   out:
 476     ip |= part << (24 - 8 * dots);
 477     *output = htonl(ip);
 478     if (pbits)
 479       *pbits = bits;
 480     return pos;
 481   case '.':
 482     if (input[++pos] == '.')
 483       return 0;
 484     ip |= part << (24 - 8 * dots++);
 485     part = 0;
 486     if (input[pos] == '*') {
 487       while (input[++pos] == '*') ;
 488       if (input[pos] != '\0')
 489         return 0;
 490       if (pbits)
 491         *pbits = dots * 8;
 492       *output = htonl(ip);
 493       return pos;
 494     }
 495     break;
 496   case '/':
 497     if (!pbits || !IsDigit(input[pos + 1]))
 498       return 0;
 499     for (bits = 0; IsDigit(input[++pos]); )
 500       bits = bits * 10 + input[pos] - '0';
 501     if (bits > 32)
 502       return 0;
 503     goto out;
 504   case '0': case '1': case '2': case '3': case '4':
 505   case '5': case '6': case '7': case '8': case '9':
 506     part = part * 10 + input[pos++] - '0';
 507     if (part > 255)
 508       return 0;
 509     break;
 510   default:
 511     return 0;
 512   }
 513 }
 514
 515 /** Parse a numeric IPv4 or IPv6 address into an irc_in_addr.
 516  * @param[in] input Input buffer.
 517  * @param[out] ip Receives parsed IP address.
 518  * @param[out] pbits If non-NULL, receives number of bits specified in address mask.
 519  * @return Number of characters used from \a input, or 0 if the
 520  * address was unparseable or malformed.
 521  */
 522 int
 523 ipmask_parse(const char *input, struct irc_in_addr *ip, unsigned char *pbits)
 524 {
 525   char *colon;
 526   char *dot;
 527
 528   assert(ip);
 529   assert(input);
 530   memset(ip, 0, sizeof(*ip));
 531   colon = strchr(input, ':');
 532   dot = strchr(input, '.');
 533
 534   if (colon && (!dot || (dot > colon))) {
 535     unsigned int part = 0, pos = 0, ii = 0, colon = 8;
 536     const char *part_start = NULL;
 537
 538     /* Parse IPv6, possibly like ::127.0.0.1.
 539      * This is pretty straightforward; the only trick is borrowed
 540      * from Paul Vixie (BIND): when it sees a "::" continue as if
 541      * it were a single ":", but note where it happened, and fill
 542      * with zeros afterward.
 543      */
 544     if (input[pos] == ':') {
 545       if ((input[pos+1] != ':') || (input[pos+2] == ':'))
 546         return 0;
 547       colon = 0;
 548       pos += 2;
 549       part_start = input + pos;
 550     }
 551     while (ii < 8) switch (input[pos]) {
 552       unsigned char chval;
 553     case '0': case '1': case '2': case '3': case '4':
 554     case '5': case '6': case '7': case '8': case '9':
 555       chval = input[pos] - '0';
 556     use_chval:
 557       part = (part << 4) | chval;
 558       if (part > 0xffff)
 559         return 0;
 560       pos++;
 561       break;
 562     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
 563       chval = input[pos] - 'A' + 10;
 564       goto use_chval;
 565     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
 566       chval = input[pos] - 'a' + 10;
 567       goto use_chval;
 568     case ':':
 569       part_start = input + ++pos;
 570       if (input[pos] == '.')
 571         return 0;
 572       ip->in6_16[ii++] = htons(part);
 573       part = 0;
 574       if (input[pos] == ':') {
 575         if (colon < 8)
 576           return 0;
 577         colon = ii;
 578         pos++;
 579       }
 580       break;
 581     case '.': {
 582       uint32_t ip4;
 583       unsigned int len;
 584       len = ircd_aton_ip4(part_start, &ip4, pbits);
 585       if (!len || (ii > 6))
 586         return 0;
 587       ip->in6_16[ii++] = htons(ntohl(ip4) >> 16);
 588       ip->in6_16[ii++] = htons(ntohl(ip4) & 65535);
 589       if (pbits)
 590         *pbits += 96;
 591       pos = part_start + len - input;
 592       goto finish;
 593     }
 594     case '/':
 595       if (!pbits || !IsDigit(input[pos + 1]))
 596         return 0;
 597       ip->in6_16[ii++] = htons(part);
 598       for (part = 0; IsDigit(input[++pos]); )
 599         part = part * 10 + input[pos] - '0';
 600       if (part > 128)
 601         return 0;
 602       *pbits = part;
 603       goto finish;
 604     case '*':
 605       while (input[++pos] == '*') ;
 606       if (input[pos] != '\0' || colon < 8)
 607         return 0;
 608       if (pbits)
 609         *pbits = ii * 16;
 610       return pos;
 611     case '\0':
 612       ip->in6_16[ii++] = htons(part);
 613       if (colon == 8 && ii < 8)
 614         return 0;
 615       if (pbits)
 616         *pbits = 128;
 617       goto finish;
 618     default:
 619       return 0;
 620     }
 621   finish:
 622     if (colon < 8) {
 623       unsigned int jj;
 624       /* Shift stuff after "::" up and fill middle with zeros. */
 625       for (jj = 0; jj < ii - colon; jj++)
 626         ip->in6_16[7 - jj] = ip->in6_16[ii - jj - 1];
 627       for (jj = 0; jj < 8 - ii; jj++)
 628         ip->in6_16[colon + jj] = 0;
 629     }
 630     return pos;
 631   } else if (dot || strchr(input, '/')) {
 632     unsigned int addr;
 633     int len = ircd_aton_ip4(input, &addr, pbits);
 634     if (len) {
 635       ip->in6_16[5] = htons(65535);
 636       ip->in6_16[6] = htons(ntohl(addr) >> 16);
 637       ip->in6_16[7] = htons(ntohl(addr) & 65535);
 638       if (pbits)
 639         *pbits += 96;
 640     }
 641     return len;
 642   } else if (input[0] == '*') {
 643     unsigned int pos = 0;
 644     while (input[++pos] == '*') ;
 645     if (input[pos] != '\0')
 646       return 0;
 647     if (pbits)
 648       *pbits = 0;
 649     return pos;
 650   } else return 0; /* parse failed */
 651 }