ircd/ircd_string.c

   1 /*
   2  * IRC - Internet Relay Chat, ircd/ircd_string.c
   3  * Copyright (C) 1999 Thomas Helvey
   4  *
   5  * This program is free software; you can redistribute it and/or modify
   6  * it under the terms of the GNU General Public License as published by
   7  * the Free Software Foundation; either version 1, or (at your option)
   8  * any later version.
   9  *
  10  * This program is distributed in the hope that it will be useful,
  11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13  * GNU General Public License for more details.
  14  *
  15  * You should have received a copy of the GNU General Public License
  16  * along with this program; if not, write to the Free Software
  17  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  18  *
  19  * $Id$
  20  */
  21 #include "config.h"
  22
  23 #include "ircd_string.h"
  24 #include "ircd_defs.h"
  25 #include "ircd_chattr.h"
  26 #include "ircd_log.h"
  27 #include "res.h"
  28
  29 #include <assert.h>
  30 #include <string.h>
  31 #include <regex.h>
  32 #include <sys/types.h>
  33 #include <netinet/in.h>
  34
  35 /*
  36  * include the character attribute tables here
  37  */
  38 #include "chattr.tab.c"
  39
  40
  41 /*
  42  * Disallow a hostname label to contain anything but a [-a-zA-Z0-9].
  43  * It may not start or end on a '.'.
  44  * A label may not end on a '-', the maximum length of a label is
  45  * 63 characters.
  46  * On top of that (which seems to be the RFC) we demand that the
  47  * top domain does not contain any digits.
  48  */
  49 static const char* hostExpr = "^([-0-9A-Za-z]*[0-9A-Za-z]\\.)+[A-Za-z]+$";
  50 static regex_t hostRegex;
  51
  52 static const char* addrExpr =
  53     "^((25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9]?[0-9])\\.){1,3}"
  54     "(25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9]?[0-9])$";
  55 static regex_t addrRegex;
  56
  57 int init_string(void)
  58 {
  59   /*
  60    * initialize matching expressions
  61    * XXX - expressions MUST be correct, don't change expressions
  62    * without testing them. Might be a good idea to exit if these fail,
  63    * important code depends on them.
  64    * TODO: use regerror for an error message
  65    */
  66   if (regcomp(&hostRegex, hostExpr, REG_EXTENDED | REG_NOSUB))
  67     return 0;
  68
  69   if (regcomp(&addrRegex, addrExpr, REG_EXTENDED | REG_NOSUB))
  70     return 0;
  71   return 1;
  72 }
  73
  74 int string_is_hostname(const char* str)
  75 {
  76   assert(0 != str);
  77   return (strlen(str) <= HOSTLEN && 0 == regexec(&hostRegex, str, 0, 0, 0));
  78 }
  79
  80 int string_is_address(const char* str)
  81 {
  82   assert(0 != str);
  83   return (0 == regexec(&addrRegex, str, 0, 0, 0));
  84 }
  85
  86 int string_has_wildcards(const char* str)
  87 {
  88   assert(0 != str);
  89   for ( ; *str; ++str) {
  90     if ('\\' == *str) {
  91       if ('\0' == *++str)
  92         break;
  93     }
  94     else if ('*' == *str || '?' == *str)
  95       return 1;
  96   }
  97   return 0;
  98 }
  99
 100 unsigned int hash_pjw(const char* str)
 101 {
 102   unsigned h = 0;
 103   unsigned g;
 104   assert(str);
 105
 106   for ( ; *str; ++str) {
 107     h = (h << 4) + *str;
 108     if ((g = h & 0xf0000000)) {
 109       h ^= g >> 24;  /* fold top four bits onto ------X- */
 110       h ^= g;        /* clear top four bits */
 111     }
 112   }
 113   return h;
 114 }
 115
 116 /*
 117  * strtoken.c
 118  *
 119  * Walk through a string of tokens, using a set of separators.
 120  * -argv 9/90
 121  */
 122 char* ircd_strtok(char **save, char *str, char *fs)
 123 {
 124   char *pos = *save;            /* keep last position across calls */
 125   char *tmp;
 126
 127   if (str)
 128     pos = str;                  /* new string scan */
 129
 130   while (pos && *pos && strchr(fs, *pos) != NULL)
 131     pos++;                      /* skip leading separators */
 132
 133   if (!pos || !*pos)
 134     return (pos = *save = NULL);        /* string contains only sep's */
 135
 136   tmp = pos;                    /* now, keep position of the token */
 137
 138   while (*pos && strchr(fs, *pos) == NULL)
 139     pos++;                      /* skip content of the token */
 140
 141   if (*pos)
 142     *pos++ = '\0';              /* remove first sep after the token */
 143   else
 144     pos = NULL;                 /* end of string */
 145
 146   *save = pos;
 147   return (tmp);
 148 }
 149
 150 /*
 151  * canonize
 152  *
 153  * reduce a string of duplicate list entries to contain only the unique
 154  * items.  Unavoidably O(n^2).
 155  */
 156 char* canonize(char* buffer)
 157 {
 158   static char cbuf[BUFSIZE];
 159   char*       s;
 160   char*       t;
 161   char*       cp = cbuf;
 162   int         l = 0;
 163   char*       p = NULL;
 164   char*       p2;
 165
 166   *cp = '\0';
 167
 168   for (s = ircd_strtok(&p, buffer, ","); s; s = ircd_strtok(&p, NULL, ","))
 169   {
 170     if (l)
 171     {
 172       p2 = NULL;
 173       for (t = ircd_strtok(&p2, cbuf, ","); t; t = ircd_strtok(&p2, NULL, ","))
 174         if (0 == ircd_strcmp(s, t))
 175           break;
 176         else if (p2)
 177           p2[-1] = ',';
 178     }
 179     else
 180       t = NULL;
 181     if (!t)
 182     {
 183       if (l)
 184         *(cp - 1) = ',';
 185       else
 186         l = 1;
 187       strcpy(cp, s);
 188       if (p)
 189         cp += (p - s);
 190     }
 191     else if (p2)
 192       p2[-1] = ',';
 193   }
 194   return cbuf;
 195 }
 196
 197 /*
 198  * ircd_strncpy - optimized strncpy
 199  * This may not look like it would be the fastest possible way to do it,
 200  * but it generally outperforms everything else on many platforms,
 201  * including asm library versions and memcpy, if compiled with the
 202  * optimizer on. (-O2 for gcc) --Bleep
 203  */
 204 char* ircd_strncpy(char* s1, const char* s2, size_t n)
 205 {
 206   char* endp = s1 + n;
 207   char* s = s1;
 208
 209   assert(0 != s1);
 210   assert(0 != s2);
 211
 212   while (s < endp && (*s++ = *s2++))
 213     ;
 214   return s1;
 215 }
 216
 217
 218 #ifndef FORCEINLINE
 219 NTL_HDR_strChattr { NTL_SRC_strChattr }
 220 NTL_HDR_strCasediff { NTL_SRC_strCasediff }
 221 #endif /* !FORCEINLINE */
 222
 223 /*
 224  * Other functions visible externally
 225  */
 226
 227 int strnChattr(const char *s, size_t n)
 228 {
 229   const char *rs = s;
 230   unsigned int x = ~0;
 231   int r = n;
 232   while (*rs && r--)
 233     x &= IRCD_CharAttrTab[*rs++ - CHAR_MIN];
 234   return x;
 235 }
 236
 237 /*
 238  * ircd_strcmp - case insensitive comparison of 2 strings
 239  * NOTE: see ircd_chattr.h for notes on case mapping.
 240  */
 241 int ircd_strcmp(const char *a, const char *b)
 242 {
 243   const char* ra = a;
 244   const char* rb = b;
 245   while (ToLower(*ra) == ToLower(*rb)) {
 246     if (!*ra++)
 247       return 0;
 248     else
 249       ++rb;
 250   }
 251   return (*ra - *rb);
 252 }
 253
 254 /*
 255  * ircd_strncmp - counted case insensitive comparison of 2 strings
 256  * NOTE: see ircd_chattr.h for notes on case mapping.
 257  */
 258 int ircd_strncmp(const char *a, const char *b, size_t n)
 259 {
 260   const char* ra = a;
 261   const char* rb = b;
 262   int left = n;
 263   if (!left--)
 264     return 0;
 265   while (ToLower(*ra) == ToLower(*rb)) {
 266     if (!*ra++ || !left--)
 267       return 0;
 268     else
 269       ++rb;
 270   }
 271   return (*ra - *rb);
 272 }
 273
 274 /*
 275  * unique_name_vector - create a unique vector of names from
 276  * a token separated list
 277  * list   - [in]  a token delimited null terminated character array
 278  * token  - [in]  the token to replace
 279  * vector - [out] vector of strings to be returned
 280  * size   - [in]  maximum number of elements to place in vector
 281  * Returns count of elements placed into the vector, if the list
 282  * is an empty string { '\0' } 0 is returned.
 283  * list, and vector must be non-null and size must be > 0
 284  * Empty strings <token><token> are not placed in the vector or counted.
 285  * This function ignores all subsequent tokens when count == size
 286  *
 287  * NOTE: this function destroys it's input, do not use list after it
 288  * is passed to this function
 289  */
 290 int unique_name_vector(char* list, char token, char** vector, int size)
 291 {
 292   int   i;
 293   int   count = 0;
 294   char* start = list;
 295   char* end;
 296
 297   assert(0 != list);
 298   assert(0 != vector);
 299   assert(0 < size);
 300
 301   /*
 302    * ignore spurious tokens
 303    */
 304   while (token == *start)
 305     ++start;
 306
 307   for (end = strchr(start, token); end; end = strchr(start, token)) {
 308     *end++ = '\0';
 309     /*
 310      * ignore spurious tokens
 311      */
 312     while (token == *end)
 313       ++end;
 314     for (i = 0; i < count; ++i) {
 315       if (0 == ircd_strcmp(vector[i], start))
 316         break;
 317     }
 318     if (i == count) {
 319       vector[count++] = start;
 320       if (count == size)
 321         return count;
 322     }
 323     start = end;
 324   }
 325   if (*start) {
 326     for (i = 0; i < count; ++i)
 327       if (0 == ircd_strcmp(vector[i], start))
 328         return count;
 329     vector[count++] = start;
 330   }
 331   return count;
 332 }
 333
 334 /*
 335  * token_vector - create a vector of tokens from
 336  * a token separated list
 337  * list   - [in]  a token delimited null terminated character array
 338  * token  - [in]  the token to replace
 339  * vector - [out] vector of strings to be returned
 340  * size   - [in]  maximum number of elements to place in vector
 341  * returns count of elements placed into the vector, if the list
 342  * is an empty string { '\0' } 0 is returned.
 343  * list, and vector must be non-null and size must be > 1
 344  * Empty tokens are counted and placed in the list
 345  *
 346  * NOTE: this function destroys it's input, do not use list after it
 347  * is passed to this function
 348  */
 349 int token_vector(char* list, char token, char** vector, int size)
 350 {
 351   int   count = 0;
 352   char* start = list;
 353   char* end;
 354
 355   assert(0 != list);
 356   assert(0 != vector);
 357   assert(1 < size);
 358
 359   vector[count++] = start;
 360   for (end = strchr(start, token); end; end = strchr(start, token)) {
 361     *end++ = '\0';
 362     start = end;
 363     if (*start) {
 364       vector[count++] = start;
 365       if (count < size)
 366         continue;
 367     }
 368     break;
 369   }
 370   return count;
 371 }
 372
 373 /*
 374  * host_from_uh - get the host.domain part of a user@host.domain string
 375  * ripped from get_sockhost
 376  */
 377 char* host_from_uh(char* host, const char* userhost, size_t n)
 378 {
 379   const char* s;
 380
 381   assert(0 != host);
 382   assert(0 != userhost);
 383
 384   if ((s = strchr(userhost, '@')))
 385     ++s;
 386   else
 387     s = userhost;
 388   ircd_strncpy(host, s, n);
 389   host[n] = '\0';
 390   return host;
 391 }
 392
 393 /*
 394  * this new faster inet_ntoa was ripped from:
 395  * From: Thomas Helvey <tomh@inxpress.net>
 396  */
 397 static const char* IpQuadTab[] =
 398 {
 399     "0",   "1",   "2",   "3",   "4",   "5",   "6",   "7",   "8",   "9",
 400    "10",  "11",  "12",  "13",  "14",  "15",  "16",  "17",  "18",  "19",
 401    "20",  "21",  "22",  "23",  "24",  "25",  "26",  "27",  "28",  "29",
 402    "30",  "31",  "32",  "33",  "34",  "35",  "36",  "37",  "38",  "39",
 403    "40",  "41",  "42",  "43",  "44",  "45",  "46",  "47",  "48",  "49",
 404    "50",  "51",  "52",  "53",  "54",  "55",  "56",  "57",  "58",  "59",
 405    "60",  "61",  "62",  "63",  "64",  "65",  "66",  "67",  "68",  "69",
 406    "70",  "71",  "72",  "73",  "74",  "75",  "76",  "77",  "78",  "79",
 407    "80",  "81",  "82",  "83",  "84",  "85",  "86",  "87",  "88",  "89",
 408    "90",  "91",  "92",  "93",  "94",  "95",  "96",  "97",  "98",  "99",
 409   "100", "101", "102", "103", "104", "105", "106", "107", "108", "109",
 410   "110", "111", "112", "113", "114", "115", "116", "117", "118", "119",
 411   "120", "121", "122", "123", "124", "125", "126", "127", "128", "129",
 412   "130", "131", "132", "133", "134", "135", "136", "137", "138", "139",
 413   "140", "141", "142", "143", "144", "145", "146", "147", "148", "149",
 414   "150", "151", "152", "153", "154", "155", "156", "157", "158", "159",
 415   "160", "161", "162", "163", "164", "165", "166", "167", "168", "169",
 416   "170", "171", "172", "173", "174", "175", "176", "177", "178", "179",
 417   "180", "181", "182", "183", "184", "185", "186", "187", "188", "189",
 418   "190", "191", "192", "193", "194", "195", "196", "197", "198", "199",
 419   "200", "201", "202", "203", "204", "205", "206", "207", "208", "209",
 420   "210", "211", "212", "213", "214", "215", "216", "217", "218", "219",
 421   "220", "221", "222", "223", "224", "225", "226", "227", "228", "229",
 422   "230", "231", "232", "233", "234", "235", "236", "237", "238", "239",
 423   "240", "241", "242", "243", "244", "245", "246", "247", "248", "249",
 424   "250", "251", "252", "253", "254", "255"
 425 };
 426
 427 /*
 428  * ircd_ntoa - rewrote and renamed yet again :) --Bleep
 429  * inetntoa - in_addr to string
 430  *      changed name to remove collision possibility and
 431  *      so behaviour is guaranteed to take a pointer arg.
 432  *      -avalon 23/11/92
 433  *  inet_ntoa --  returned the dotted notation of a given
 434  *      internet number
 435  *      argv 11/90).
 436  *  inet_ntoa --  its broken on some Ultrix/Dynix too. -avalon
 437  */
 438 const char* ircd_ntoa(const struct irc_in_addr* in)
 439 {
 440   static char buf[SOCKIPLEN];
 441   return ircd_ntoa_r(buf, in);
 442 }
 443
 444 /* This doesn't really belong here, but otherwise umkpasswd breaks. */
 445 int irc_in_addr_is_ipv4(const struct irc_in_addr *addr)
 446 {
 447   return addr->in6_16[0] == 0
 448     && addr->in6_16[1] == 0
 449     && addr->in6_16[2] == 0
 450     && addr->in6_16[3] == 0
 451     && addr->in6_16[4] == 0
 452     && (addr->in6_16[5] == 0 || addr->in6_16[5] == 0xffff)
 453     && addr->in6_16[6] != 0;
 454 }
 455
 456 /*
 457  * reentrant version of above
 458  */
 459 const char* ircd_ntoa_r(char* buf, const struct irc_in_addr* in)
 460 {
 461     assert(buf != NULL);
 462     assert(in != NULL);
 463
 464     if (irc_in_addr_is_ipv4(in)) {
 465       unsigned int pos, len;
 466       unsigned char *pch;
 467
 468       pch = (unsigned char*)&in->in6_16[6];
 469       len = strlen(IpQuadTab[*pch]);
 470       memcpy(buf, IpQuadTab[*pch++], len);
 471       pos = len;
 472       buf[pos++] = '.';
 473       len = strlen(IpQuadTab[*pch]);
 474       memcpy(buf+pos, IpQuadTab[*pch++], len);
 475       pos += len;
 476       buf[pos++] = '.';
 477       len = strlen(IpQuadTab[*pch]);
 478       memcpy(buf+pos, IpQuadTab[*pch++], len);
 479       pos += len;
 480       buf[pos++] = '.';
 481       len = strlen(IpQuadTab[*pch]);
 482       memcpy(buf+pos, IpQuadTab[*pch++], len);
 483       buf[pos + len] = '\0';
 484       return buf;
 485     } else {
 486       static const char hexdigits[] = "0123456789abcdef";
 487       unsigned int pos, part, max_start, max_zeros, curr_zeros, ii;
 488
 489       /* Find longest run of zeros. */
 490       for (max_start = ii = 1, max_zeros = curr_zeros = 0; ii < 8; ++ii) {
 491         if (!in->in6_16[ii])
 492           curr_zeros++;
 493         else if (curr_zeros > max_zeros) {
 494           max_start = ii - curr_zeros;
 495           max_zeros = curr_zeros;
 496           curr_zeros = 0;
 497         }
 498       }
 499       if (curr_zeros > max_zeros) {
 500         max_start = ii - curr_zeros;
 501         max_zeros = curr_zeros;
 502       }
 503
 504       /* Print out address. */
 505 #define APPEND(CH) do { buf[pos++] = (CH); } while (0)
 506       for (pos = ii = 0; (ii < 8); ++ii) {
 507         if ((max_zeros > 0) && (ii == max_start)) {
 508           APPEND(':');
 509           ii += max_zeros - 1;
 510           continue;
 511         }
 512         part = ntohs(in->in6_16[ii]);
 513         if (part >= 0x1000)
 514           APPEND(hexdigits[part >> 12]);
 515         if (part >= 0x100)
 516           APPEND(hexdigits[(part >> 8) & 15]);
 517         if (part >= 0x10)
 518           APPEND(hexdigits[(part >> 4) & 15]);
 519         APPEND(hexdigits[part & 15]);
 520         if (ii < 7)
 521           APPEND(':');
 522       }
 523       if (max_zeros + max_start == 8)
 524         APPEND(':');
 525 #undef APPEND
 526
 527       /* Nul terminate and return number of characters used. */
 528       buf[pos++] = '\0';
 529       return buf;
 530     }
 531 }
 532
 533 static unsigned int
 534 ircd_aton_ip4(const char *input, unsigned int *output)
 535 {
 536   unsigned int dots = 0, pos = 0, part = 0, ip = 0;
 537
 538   /* Intentionally no support for bizarre IPv4 formats (plain
 539    * integers, octal or hex components) -- only vanilla dotted
 540    * decimal quads.
 541    */
 542   if (input[0] == '.')
 543     return 0;
 544   while (1) {
 545     if (IsDigit(input[pos])) {
 546       part = part * 10 + input[pos++] - '0';
 547       if (part > 255)
 548         return 0;
 549       if ((dots == 3) && !IsDigit(input[pos])) {
 550         *output = htonl(ip | part);
 551         return pos;
 552       }
 553     } else if (input[pos] == '.') {
 554       if (input[++pos] == '.')
 555         return 0;
 556       ip |= part << (24 - 8 * dots++);
 557       part = 0;
 558     } else
 559       return 0;
 560   }
 561 }
 562
 563 /* ircd_aton - Parse a numeric IPv4 or IPv6 address into an irc_in_addr.
 564  * Returns number of characters used by address, or 0 if the address was
 565  * unparseable or malformed.
 566  */
 567 int
 568 ircd_aton(struct irc_in_addr *ip, const char *input)
 569 {
 570   char *colon;
 571   char *dot;
 572
 573   assert(ip);
 574   assert(input);
 575   memset(ip, 0, sizeof(*ip));
 576   colon = strchr(input, ':');
 577   dot = strchr(input, '.');
 578
 579   if (colon && (!dot || (dot > colon))) {
 580     unsigned int part = 0, pos = 0, ii = 0, colon = 8;
 581     const char *part_start = NULL;
 582
 583     /* Parse IPv6, possibly like ::127.0.0.1.
 584      * This is pretty straightforward; the only trick is borrowed
 585      * from Paul Vixie (BIND): when it sees a "::" continue as if
 586      * it were a single ":", but note where it happened, and fill
 587      * with zeros afterwards.
 588      */
 589     if (input[pos] == ':') {
 590       if ((input[pos+1] != ':') || (input[pos+2] == ':'))
 591         return 0;
 592       colon = 0;
 593       pos += 2;
 594     }
 595     while (ii < 8) {
 596       unsigned char chval;
 597
 598       switch (input[pos]) {
 599       case '0': case '1': case '2': case '3': case '4':
 600       case '5': case '6': case '7': case '8': case '9':
 601           chval = input[pos] - '0';
 602       use_chval:
 603         part = (part << 4) | chval;
 604         if (part > 0xffff)
 605           return 0;
 606         pos++;
 607         break;
 608       case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
 609           chval = input[pos] - 'A' + 10;
 610           goto use_chval;
 611       case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
 612           chval = input[pos] - 'a' + 10;
 613           goto use_chval;
 614       case ':':
 615         part_start = input + ++pos;
 616         if (input[pos] == '.')
 617           return 0;
 618         ip->in6_16[ii++] = htons(part);
 619         part = 0;
 620         if (input[pos] == ':') {
 621           if (colon < 8)
 622             return 0;
 623           colon = ii;
 624           pos++;
 625         }
 626         break;
 627       case '.': {
 628         uint32_t ip4;
 629         unsigned int len;
 630         len = ircd_aton_ip4(input + pos, &ip4);
 631         if (!len || (ii > 6))
 632           return 0;
 633         ip->in6_16[ii++] = htons(ntohl(ip4) >> 16);
 634         ip->in6_16[ii++] = htons(ntohl(ip4) & 65535);
 635         pos += len;
 636         break;
 637       }
 638       default: {
 639         unsigned int jj;
 640         if (colon >= 8)
 641           return 0;
 642         /* Shift stuff after "::" up and fill middle with zeros. */
 643         ip->in6_16[ii++] = htons(part);
 644         for (jj = 0; jj < ii - colon; jj++)
 645           ip->in6_16[7 - jj] = ip->in6_16[ii - jj - 1];
 646         for (jj = 0; jj < 8 - ii; jj++)
 647           ip->in6_16[colon + jj] = 0;
 648         return pos;
 649       }
 650       }
 651     }
 652     return pos;
 653   } else if (dot) {
 654     unsigned int addr;
 655     int len = ircd_aton_ip4(input, &addr);
 656     if (len) {
 657       ip->in6_16[6] = htons(ntohl(addr) >> 16);
 658       ip->in6_16[7] = htons(ntohl(addr) & 65535);
 659       return len;
 660     }
 661   }
 662   return 0; /* parse failed */
 663 }