ircd/ircd_string.c

   1 /*
   2  * IRC - Internet Relay Chat, ircd/ircd_string.c
   3  * Copyright (C) 1999 Thomas Helvey
   4  *
   5  * This program is free software; you can redistribute it and/or modify
   6  * it under the terms of the GNU General Public License as published by
   7  * the Free Software Foundation; either version 1, or (at your option)
   8  * any later version.
   9  *
  10  * This program is distributed in the hope that it will be useful,
  11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13  * GNU General Public License for more details.
  14  *
  15  * You should have received a copy of the GNU General Public License
  16  * along with this program; if not, write to the Free Software
  17  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  18  *
  19  * $Id$
  20  */
  21 #include "config.h"
  22
  23 #include "ircd_string.h"
  24 #include "ircd_defs.h"
  25 #include "ircd_chattr.h"
  26 #include "ircd_log.h"
  27 #include <assert.h>
  28 #include <string.h>
  29 #include <regex.h>
  30 /*
  31  * include the character attribute tables here
  32  */
  33 #include "chattr.tab.c"
  34
  35
  36 /*
  37  * Disallow a hostname label to contain anything but a [-a-zA-Z0-9].
  38  * It may not start or end on a '.'.
  39  * A label may not end on a '-', the maximum length of a label is
  40  * 63 characters.
  41  * On top of that (which seems to be the RFC) we demand that the
  42  * top domain does not contain any digits.
  43  */
  44 static const char* hostExpr = "^([-0-9A-Za-z]*[0-9A-Za-z]\\.)+[A-Za-z]+$";
  45 static regex_t hostRegex;
  46
  47 static const char* addrExpr =
  48     "^((25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9]?[0-9])\\.){1,3}(25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9]?[0-9])$";
  49 static regex_t addrRegex;
  50
  51 int init_string(void)
  52 {
  53   /*
  54    * initialize matching expressions
  55    * XXX - expressions MUST be correct, don't change expressions
  56    * without testing them. Might be a good idea to exit if these fail,
  57    * important code depends on them.
  58    * TODO: use regerror for an error message
  59    */
  60   if (regcomp(&hostRegex, hostExpr, REG_EXTENDED | REG_NOSUB))
  61     return 0;
  62
  63   if (regcomp(&addrRegex, addrExpr, REG_EXTENDED | REG_NOSUB))
  64     return 0;
  65   return 1;
  66 }
  67
  68 int string_is_hostname(const char* str)
  69 {
  70   assert(0 != str);
  71   return (strlen(str) <= HOSTLEN && 0 == regexec(&hostRegex, str, 0, 0, 0));
  72 }
  73
  74 int string_is_address(const char* str)
  75 {
  76   assert(0 != str);
  77   return (0 == regexec(&addrRegex, str, 0, 0, 0));
  78 }
  79
  80 int string_has_wildcards(const char* str)
  81 {
  82   assert(0 != str);
  83   for ( ; *str; ++str) {
  84     if ('\\' == *str) {
  85       if ('\0' == *++str)
  86         break;
  87     }
  88     else if ('*' == *str || '?' == *str)
  89       return 1;
  90   }
  91   return 0;
  92 }
  93
  94 /*
  95  * strtoken.c
  96  *
  97  * Walk through a string of tokens, using a set of separators.
  98  * -argv 9/90
  99  */
 100 char* ircd_strtok(char **save, char *str, char *fs)
 101 {
 102   char *pos = *save;            /* keep last position across calls */
 103   char *tmp;
 104
 105   if (str)
 106     pos = str;                  /* new string scan */
 107
 108   while (pos && *pos && strchr(fs, *pos) != NULL)
 109     pos++;                      /* skip leading separators */
 110
 111   if (!pos || !*pos)
 112     return (pos = *save = NULL);        /* string contains only sep's */
 113
 114   tmp = pos;                    /* now, keep position of the token */
 115
 116   while (*pos && strchr(fs, *pos) == NULL)
 117     pos++;                      /* skip content of the token */
 118
 119   if (*pos)
 120     *pos++ = '\0';              /* remove first sep after the token */
 121   else
 122     pos = NULL;                 /* end of string */
 123
 124   *save = pos;
 125   return (tmp);
 126 }
 127
 128 /*
 129  * canonize
 130  *
 131  * reduce a string of duplicate list entries to contain only the unique
 132  * items.  Unavoidably O(n^2).
 133  */
 134 char* canonize(char* buffer)
 135 {
 136   static char cbuf[BUFSIZE];
 137   char*       s;
 138   char*       t;
 139   char*       cp = cbuf;
 140   int         l = 0;
 141   char*       p = NULL;
 142   char*       p2;
 143
 144   *cp = '\0';
 145
 146   for (s = ircd_strtok(&p, buffer, ","); s; s = ircd_strtok(&p, NULL, ","))
 147   {
 148     if (l)
 149     {
 150       p2 = NULL;
 151       for (t = ircd_strtok(&p2, cbuf, ","); t; t = ircd_strtok(&p2, NULL, ","))
 152         if (0 == ircd_strcmp(s, t))
 153           break;
 154         else if (p2)
 155           p2[-1] = ',';
 156     }
 157     else
 158       t = NULL;
 159     if (!t)
 160     {
 161       if (l)
 162         *(cp - 1) = ',';
 163       else
 164         l = 1;
 165       strcpy(cp, s);
 166       if (p)
 167         cp += (p - s);
 168     }
 169     else if (p2)
 170       p2[-1] = ',';
 171   }
 172   return cbuf;
 173 }
 174
 175 /*
 176  * ircd_strncpy - optimized strncpy
 177  * This may not look like it would be the fastest possible way to do it,
 178  * but it generally outperforms everything else on many platforms,
 179  * including asm library versions and memcpy, if compiled with the
 180  * optimizer on. (-O2 for gcc) --Bleep
 181  */
 182 char* ircd_strncpy(char* s1, const char* s2, size_t n)
 183 {
 184   char* endp = s1 + n;
 185   char* s = s1;
 186
 187   assert(0 != s1);
 188   assert(0 != s2);
 189
 190   while (s < endp && (*s++ = *s2++))
 191     ;
 192   return s1;
 193 }
 194
 195
 196 #ifndef FORCEINLINE
 197 NTL_HDR_strChattr { NTL_SRC_strChattr }
 198 NTL_HDR_strCasediff { NTL_SRC_strCasediff }
 199 #endif /* !FORCEINLINE */
 200
 201 /*
 202  * Other functions visible externally
 203  */
 204
 205 int strnChattr(const char *s, size_t n)
 206 {
 207   const char *rs = s;
 208   unsigned int x = ~0;
 209   int r = n;
 210   while (*rs && r--)
 211     x &= IRCD_CharAttrTab[*rs++ - CHAR_MIN];
 212   return x;
 213 }
 214
 215 /*
 216  * ircd_strcmp - case insensitive comparison of 2 strings
 217  * NOTE: see ircd_chattr.h for notes on case mapping.
 218  */
 219 int ircd_strcmp(const char *a, const char *b)
 220 {
 221   const char* ra = a;
 222   const char* rb = b;
 223   while (ToLower(*ra) == ToLower(*rb)) {
 224     if (!*ra++)
 225       return 0;
 226     else
 227       ++rb;
 228   }
 229   return (*ra - *rb);
 230 }
 231
 232 /*
 233  * ircd_strncmp - counted case insensitive comparison of 2 strings
 234  * NOTE: see ircd_chattr.h for notes on case mapping.
 235  */
 236 int ircd_strncmp(const char *a, const char *b, size_t n)
 237 {
 238   const char* ra = a;
 239   const char* rb = b;
 240   int left = n;
 241   if (!left--)
 242     return 0;
 243   while (ToLower(*ra) == ToLower(*rb)) {
 244     if (!*ra++ || !left--)
 245       return 0;
 246     else
 247       ++rb;
 248   }
 249   return (*ra - *rb);
 250 }
 251
 252 /*
 253  * unique_name_vector - create a unique vector of names from
 254  * a token separated list
 255  * list   - [in]  a token delimited null terminated character array
 256  * token  - [in]  the token to replace
 257  * vector - [out] vector of strings to be returned
 258  * size   - [in]  maximum number of elements to place in vector
 259  * Returns count of elements placed into the vector, if the list
 260  * is an empty string { '\0' } 0 is returned.
 261  * list, and vector must be non-null and size must be > 0
 262  * Empty strings <token><token> are not placed in the vector or counted.
 263  * This function ignores all subsequent tokens when count == size
 264  *
 265  * NOTE: this function destroys it's input, do not use list after it
 266  * is passed to this function
 267  */
 268 int unique_name_vector(char* list, char token, char** vector, int size)
 269 {
 270   int   i;
 271   int   count = 0;
 272   char* start = list;
 273   char* end;
 274
 275   assert(0 != list);
 276   assert(0 != vector);
 277   assert(0 < size);
 278
 279   /*
 280    * ignore spurious tokens
 281    */
 282   while (token == *start)
 283     ++start;
 284
 285   for (end = strchr(start, token); end; end = strchr(start, token)) {
 286     *end++ = '\0';
 287     /*
 288      * ignore spurious tokens
 289      */
 290     while (token == *end)
 291       ++end;
 292     for (i = 0; i < count; ++i) {
 293       if (0 == ircd_strcmp(vector[i], start))
 294         break;
 295     }
 296     if (i == count) {
 297       vector[count++] = start;
 298       if (count == size)
 299         return count;
 300     }
 301     start = end;
 302   }
 303   if (*start) {
 304     for (i = 0; i < count; ++i)
 305       if (0 == ircd_strcmp(vector[i], start))
 306         return count;
 307     vector[count++] = start;
 308   }
 309   return count;
 310 }
 311
 312 /*
 313  * token_vector - create a vector of tokens from
 314  * a token separated list
 315  * list   - [in]  a token delimited null terminated character array
 316  * token  - [in]  the token to replace
 317  * vector - [out] vector of strings to be returned
 318  * size   - [in]  maximum number of elements to place in vector
 319  * returns count of elements placed into the vector, if the list
 320  * is an empty string { '\0' } 0 is returned.
 321  * list, and vector must be non-null and size must be > 1
 322  * Empty tokens are counted and placed in the list
 323  *
 324  * NOTE: this function destroys it's input, do not use list after it
 325  * is passed to this function
 326  */
 327 int token_vector(char* list, char token, char** vector, int size)
 328 {
 329   int   count = 0;
 330   char* start = list;
 331   char* end;
 332
 333   assert(0 != list);
 334   assert(0 != vector);
 335   assert(1 < size);
 336
 337   vector[count++] = start;
 338   for (end = strchr(start, token); end; end = strchr(start, token)) {
 339     *end++ = '\0';
 340     start = end;
 341     if (*start) {
 342       vector[count++] = start;
 343       if (count < size)
 344         continue;
 345     }
 346     break;
 347   }
 348   return count;
 349 }
 350
 351 /*
 352  * host_from_uh - get the host.domain part of a user@host.domain string
 353  * ripped from get_sockhost
 354  */
 355 char* host_from_uh(char* host, const char* userhost, size_t n)
 356 {
 357   const char* s;
 358
 359   assert(0 != host);
 360   assert(0 != userhost);
 361
 362   if ((s = strchr(userhost, '@')))
 363     ++s;
 364   else
 365     s = userhost;
 366   ircd_strncpy(host, s, n);
 367   host[n] = '\0';
 368   return host;
 369 }
 370
 371 /*
 372  * this new faster inet_ntoa was ripped from:
 373  * From: Thomas Helvey <tomh@inxpress.net>
 374  */
 375 static const char* IpQuadTab[] =
 376 {
 377     "0",   "1",   "2",   "3",   "4",   "5",   "6",   "7",   "8",   "9",
 378    "10",  "11",  "12",  "13",  "14",  "15",  "16",  "17",  "18",  "19",
 379    "20",  "21",  "22",  "23",  "24",  "25",  "26",  "27",  "28",  "29",
 380    "30",  "31",  "32",  "33",  "34",  "35",  "36",  "37",  "38",  "39",
 381    "40",  "41",  "42",  "43",  "44",  "45",  "46",  "47",  "48",  "49",
 382    "50",  "51",  "52",  "53",  "54",  "55",  "56",  "57",  "58",  "59",
 383    "60",  "61",  "62",  "63",  "64",  "65",  "66",  "67",  "68",  "69",
 384    "70",  "71",  "72",  "73",  "74",  "75",  "76",  "77",  "78",  "79",
 385    "80",  "81",  "82",  "83",  "84",  "85",  "86",  "87",  "88",  "89",
 386    "90",  "91",  "92",  "93",  "94",  "95",  "96",  "97",  "98",  "99",
 387   "100", "101", "102", "103", "104", "105", "106", "107", "108", "109",
 388   "110", "111", "112", "113", "114", "115", "116", "117", "118", "119",
 389   "120", "121", "122", "123", "124", "125", "126", "127", "128", "129",
 390   "130", "131", "132", "133", "134", "135", "136", "137", "138", "139",
 391   "140", "141", "142", "143", "144", "145", "146", "147", "148", "149",
 392   "150", "151", "152", "153", "154", "155", "156", "157", "158", "159",
 393   "160", "161", "162", "163", "164", "165", "166", "167", "168", "169",
 394   "170", "171", "172", "173", "174", "175", "176", "177", "178", "179",
 395   "180", "181", "182", "183", "184", "185", "186", "187", "188", "189",
 396   "190", "191", "192", "193", "194", "195", "196", "197", "198", "199",
 397   "200", "201", "202", "203", "204", "205", "206", "207", "208", "209",
 398   "210", "211", "212", "213", "214", "215", "216", "217", "218", "219",
 399   "220", "221", "222", "223", "224", "225", "226", "227", "228", "229",
 400   "230", "231", "232", "233", "234", "235", "236", "237", "238", "239",
 401   "240", "241", "242", "243", "244", "245", "246", "247", "248", "249",
 402   "250", "251", "252", "253", "254", "255"
 403 };
 404
 405 /*
 406  * ircd_ntoa - rewrote and renamed yet again :) --Bleep
 407  * inetntoa - in_addr to string
 408  *      changed name to remove collision possibility and
 409  *      so behaviour is guaranteed to take a pointer arg.
 410  *      -avalon 23/11/92
 411  *  inet_ntoa --  returned the dotted notation of a given
 412  *      internet number
 413  *      argv 11/90).
 414  *  inet_ntoa --  its broken on some Ultrix/Dynix too. -avalon
 415  */
 416 const char* ircd_ntoa(const char* in)
 417 {
 418   static char buf[20];
 419   return ircd_ntoa_r(buf, in);
 420 }
 421
 422 /*
 423  * reentrant version of above
 424  */
 425 const char* ircd_ntoa_r(char* buf, const char* in)
 426 {
 427   char*                p = buf;
 428   const unsigned char* a = (const unsigned char*)in;
 429   const char*          n;
 430
 431   assert(0 != buf);
 432   assert(0 != in);
 433
 434   n = IpQuadTab[*a++];
 435   while ((*p = *n++))
 436     ++p;
 437   *p++ = '.';
 438   n = IpQuadTab[*a++];
 439   while ((*p = *n++))
 440     ++p;
 441   *p++ = '.';
 442   n = IpQuadTab[*a++];
 443   while ((*p = *n++))
 444     ++p;
 445   *p++ = '.';
 446   n = IpQuadTab[*a];
 447   while ((*p = *n++))
 448     ++p;
 449   return buf;
 450 }
 451
 452