ircd/ircd_string.c

   1 /*
   2  * IRC - Internet Relay Chat, ircd/ircd_string.c
   3  * Copyright (C) 1999 Thomas Helvey
   4  *
   5  * This program is free software; you can redistribute it and/or modify
   6  * it under the terms of the GNU General Public License as published by
   7  * the Free Software Foundation; either version 1, or (at your option)
   8  * any later version.
   9  *
  10  * This program is distributed in the hope that it will be useful,
  11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13  * GNU General Public License for more details.
  14  *
  15  * You should have received a copy of the GNU General Public License
  16  * along with this program; if not, write to the Free Software
  17  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  18  *
  19  * $Id$
  20  */
  21 #include "config.h"
  22
  23 #include "ircd_string.h"
  24 #include "ircd_defs.h"
  25 #include "ircd_chattr.h"
  26 #include "ircd_log.h"
  27 #include <assert.h>
  28 #include <string.h>
  29 #include <regex.h>
  30 /*
  31  * include the character attribute tables here
  32  */
  33 #include "chattr.tab.c"
  34
  35
  36 /*
  37  * Disallow a hostname label to contain anything but a [-a-zA-Z0-9].
  38  * It may not start or end on a '.'.
  39  * A label may not end on a '-', the maximum length of a label is
  40  * 63 characters.
  41  * On top of that (which seems to be the RFC) we demand that the
  42  * top domain does not contain any digits.
  43  */
  44 static const char* hostExpr = "^([-0-9A-Za-z]*[0-9A-Za-z]\\.)+[A-Za-z]+$";
  45 static regex_t hostRegex;
  46
  47 static const char* addrExpr =
  48     "^((25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9]?[0-9])\\.){1,3}(25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9]?[0-9])$";
  49 static regex_t addrRegex;
  50
  51 int init_string(void)
  52 {
  53   /*
  54    * initialize matching expressions
  55    * XXX - expressions MUST be correct, don't change expressions
  56    * without testing them. Might be a good idea to exit if these fail,
  57    * important code depends on them.
  58    * TODO: use regerror for an error message
  59    */
  60   if (regcomp(&hostRegex, hostExpr, REG_EXTENDED | REG_NOSUB))
  61     return 0;
  62
  63   if (regcomp(&addrRegex, addrExpr, REG_EXTENDED | REG_NOSUB))
  64     return 0;
  65   return 1;
  66 }
  67
  68 int string_is_hostname(const char* str)
  69 {
  70   assert(0 != str);
  71   return (strlen(str) <= HOSTLEN && 0 == regexec(&hostRegex, str, 0, 0, 0));
  72 }
  73
  74 int string_is_address(const char* str)
  75 {
  76   assert(0 != str);
  77   return (0 == regexec(&addrRegex, str, 0, 0, 0));
  78 }
  79
  80 int string_has_wildcards(const char* str)
  81 {
  82   assert(0 != str);
  83   for ( ; *str; ++str) {
  84     if ('\\' == *str) {
  85       if ('\0' == *++str)
  86         break;
  87     }
  88     else if ('*' == *str || '?' == *str)
  89       return 1;
  90   }
  91   return 0;
  92 }
  93
  94 /*
  95  * strtoken.c
  96  *
  97  * Walk through a string of tokens, using a set of separators.
  98  * -argv 9/90
  99  */
 100 char* ircd_strtok(char **save, char *str, char *fs)
 101 {
 102   char *pos = *save;            /* keep last position across calls */
 103   char *tmp;
 104
 105   if (str)
 106     pos = str;                  /* new string scan */
 107
 108   while (pos && *pos && strchr(fs, *pos) != NULL)
 109     pos++;                      /* skip leading separators */
 110
 111   if (!pos || !*pos)
 112     return (pos = *save = NULL);        /* string contains only sep's */
 113
 114   tmp = pos;                    /* now, keep position of the token */
 115
 116   while (*pos && strchr(fs, *pos) == NULL)
 117     pos++;                      /* skip content of the token */
 118
 119   if (*pos)
 120     *pos++ = '\0';              /* remove first sep after the token */
 121   else
 122     pos = NULL;                 /* end of string */
 123
 124   *save = pos;
 125   return (tmp);
 126 }
 127
 128 /*
 129  * canonize
 130  *
 131  * reduce a string of duplicate list entries to contain only the unique
 132  * items.  Unavoidably O(n^2).
 133  */
 134 char* canonize(char* buffer)
 135 {
 136   static char cbuf[BUFSIZE];
 137   char*       s;
 138   char*       t;
 139   char*       cp = cbuf;
 140   int         l = 0;
 141   char*       p = NULL;
 142   char*       p2;
 143
 144   *cp = '\0';
 145
 146   for (s = ircd_strtok(&p, buffer, ","); s; s = ircd_strtok(&p, NULL, ","))
 147   {
 148     if (l)
 149     {
 150       p2 = NULL;
 151       for (t = ircd_strtok(&p2, cbuf, ","); t; t = ircd_strtok(&p2, NULL, ","))
 152         if (0 == ircd_strcmp(s, t))
 153           break;
 154         else if (p2)
 155           p2[-1] = ',';
 156     }
 157     else
 158       t = NULL;
 159     if (!t)
 160     {
 161       if (l)
 162         *(cp - 1) = ',';
 163       else
 164         l = 1;
 165       strcpy(cp, s);
 166       if (p)
 167         cp += (p - s);
 168     }
 169     else if (p2)
 170       p2[-1] = ',';
 171   }
 172   return cbuf;
 173 }
 174
 175 /*
 176  * ircd_strncpy - optimized strncpy
 177  * This may not look like it would be the fastest possible way to do it,
 178  * but it generally outperforms everything else on many platforms,
 179  * including asm library versions and memcpy, if compiled with the
 180  * optimizer on. (-O2 for gcc) --Bleep
 181  */
 182 char* ircd_strncpy(char* s1, const char* s2, size_t n)
 183 {
 184   char* endp = s1 + n;
 185   char* s = s1;
 186
 187   assert(0 != s1);
 188   assert(0 != s2);
 189
 190   while (s < endp && (*s++ = *s2++))
 191     ;
 192   return s1;
 193 }
 194
 195
 196 #ifndef FORCEINLINE
 197 NTL_HDR_strChattr { NTL_SRC_strChattr }
 198 NTL_HDR_strCasediff { NTL_SRC_strCasediff }
 199 #endif /* !FORCEINLINE */
 200
 201 /*
 202  * Other functions visible externally
 203  */
 204
 205 int strnChattr(const char *s, size_t n)
 206 {
 207   const char *rs = s;
 208   unsigned int x = ~0;
 209   int r = n;
 210   while (*rs && r--)
 211     x &= IRCD_CharAttrTab[*rs++ - CHAR_MIN];
 212   return x;
 213 }
 214
 215 /*
 216  * ircd_strcmp - case insensitive comparison of 2 strings
 217  * NOTE: see ircd_chattr.h for notes on case mapping.
 218  */
 219 int ircd_strcmp(const char *a, const char *b)
 220 {
 221   const char* ra = a;
 222   const char* rb = b;
 223   while (ToLower(*ra) == ToLower(*rb)) {
 224     if (!*ra++)
 225       return 0;
 226     else
 227       ++rb;
 228   }
 229   return (*ra - *rb);
 230 }
 231
 232 /*
 233  * ircd_strncmp - counted case insensitive comparison of 2 strings
 234  * NOTE: see ircd_chattr.h for notes on case mapping.
 235  */
 236 int ircd_strncmp(const char *a, const char *b, size_t n)
 237 {
 238   const char* ra = a;
 239   const char* rb = b;
 240   int left = n;
 241   if (!left--)
 242     return 0;
 243   while (ToLower(*ra) == ToLower(*rb)) {
 244     if (!*ra++ || !left--)
 245       return 0;
 246     else
 247       ++rb;
 248   }
 249   return (*ra - *rb);
 250 }
 251
 252 /*
 253  * unique_name_vector - create a unique vector of names from
 254  * a token separated list
 255  * list   - [in]  a token delimited null terminated character array
 256  * token  - [in]  the token to replace
 257  * vector - [out] vector of strings to be returned
 258  * size   - [in]  maximum number of elements to place in vector
 259  * Returns count of elements placed into the vector, if the list
 260  * is an empty string { '\0' } 0 is returned.
 261  * list, and vector must be non-null and size must be > 0
 262  * Empty strings <token><token> are not placed in the vector or counted.
 263  * This function ignores all subsequent tokens when count == size
 264  *
 265  * NOTE: this function destroys it's input, do not use list after it
 266  * is passed to this function
 267  */
 268 int unique_name_vector(char* list, char token, char** vector, int size)
 269 {
 270   int   i;
 271   int   count = 0;
 272   char* start = list;
 273   char* end;
 274
 275   assert(0 != list);
 276   assert(0 != vector);
 277   assert(0 < size);
 278
 279   /*
 280    * ignore spurious tokens
 281    */
 282   while (token == *start)
 283     ++start;
 284
 285   for (end = strchr(start, token); end; end = strchr(start, token)) {
 286     *end++ = '\0';
 287     /*
 288      * ignore spurious tokens
 289      */
 290     while (token == *end)
 291       ++end;
 292     for (i = 0; i < count; ++i) {
 293       if (0 == ircd_strcmp(vector[i], start))
 294         break;
 295     }
 296     if (i == count) {
 297       vector[count++] = start;
 298       if (count == size)
 299         return count;
 300     }
 301     start = end;
 302   }
 303   if (*start) {
 304     for (i = 0; i < count; ++i) {
 305       if (0 == ircd_strcmp(vector[i], start))
 306         return count;
 307       vector[count++] = start;
 308     }
 309   }
 310   return count;
 311 }
 312
 313 /*
 314  * token_vector - create a vector of tokens from
 315  * a token separated list
 316  * list   - [in]  a token delimited null terminated character array
 317  * token  - [in]  the token to replace
 318  * vector - [out] vector of strings to be returned
 319  * size   - [in]  maximum number of elements to place in vector
 320  * returns count of elements placed into the vector, if the list
 321  * is an empty string { '\0' } 0 is returned.
 322  * list, and vector must be non-null and size must be > 1
 323  * Empty tokens are counted and placed in the list
 324  *
 325  * NOTE: this function destroys it's input, do not use list after it
 326  * is passed to this function
 327  */
 328 int token_vector(char* list, char token, char** vector, int size)
 329 {
 330   int   count = 0;
 331   char* start = list;
 332   char* end;
 333
 334   assert(0 != list);
 335   assert(0 != vector);
 336   assert(1 < size);
 337
 338   vector[count++] = start;
 339   for (end = strchr(start, token); end; end = strchr(start, token)) {
 340     *end++ = '\0';
 341     start = end;
 342     if (*start) {
 343       vector[count++] = start;
 344       if (count < size)
 345         continue;
 346     }
 347     break;
 348   }
 349   return count;
 350 }
 351
 352 /*
 353  * host_from_uh - get the host.domain part of a user@host.domain string
 354  * ripped from get_sockhost
 355  */
 356 char* host_from_uh(char* host, const char* userhost, size_t n)
 357 {
 358   const char* s;
 359
 360   assert(0 != host);
 361   assert(0 != userhost);
 362
 363   if ((s = strchr(userhost, '@')))
 364     ++s;
 365   else
 366     s = userhost;
 367   ircd_strncpy(host, s, n);
 368   host[n] = '\0';
 369   return host;
 370 }
 371
 372 /*
 373  * this new faster inet_ntoa was ripped from:
 374  * From: Thomas Helvey <tomh@inxpress.net>
 375  */
 376 static const char* IpQuadTab[] =
 377 {
 378     "0",   "1",   "2",   "3",   "4",   "5",   "6",   "7",   "8",   "9",
 379    "10",  "11",  "12",  "13",  "14",  "15",  "16",  "17",  "18",  "19",
 380    "20",  "21",  "22",  "23",  "24",  "25",  "26",  "27",  "28",  "29",
 381    "30",  "31",  "32",  "33",  "34",  "35",  "36",  "37",  "38",  "39",
 382    "40",  "41",  "42",  "43",  "44",  "45",  "46",  "47",  "48",  "49",
 383    "50",  "51",  "52",  "53",  "54",  "55",  "56",  "57",  "58",  "59",
 384    "60",  "61",  "62",  "63",  "64",  "65",  "66",  "67",  "68",  "69",
 385    "70",  "71",  "72",  "73",  "74",  "75",  "76",  "77",  "78",  "79",
 386    "80",  "81",  "82",  "83",  "84",  "85",  "86",  "87",  "88",  "89",
 387    "90",  "91",  "92",  "93",  "94",  "95",  "96",  "97",  "98",  "99",
 388   "100", "101", "102", "103", "104", "105", "106", "107", "108", "109",
 389   "110", "111", "112", "113", "114", "115", "116", "117", "118", "119",
 390   "120", "121", "122", "123", "124", "125", "126", "127", "128", "129",
 391   "130", "131", "132", "133", "134", "135", "136", "137", "138", "139",
 392   "140", "141", "142", "143", "144", "145", "146", "147", "148", "149",
 393   "150", "151", "152", "153", "154", "155", "156", "157", "158", "159",
 394   "160", "161", "162", "163", "164", "165", "166", "167", "168", "169",
 395   "170", "171", "172", "173", "174", "175", "176", "177", "178", "179",
 396   "180", "181", "182", "183", "184", "185", "186", "187", "188", "189",
 397   "190", "191", "192", "193", "194", "195", "196", "197", "198", "199",
 398   "200", "201", "202", "203", "204", "205", "206", "207", "208", "209",
 399   "210", "211", "212", "213", "214", "215", "216", "217", "218", "219",
 400   "220", "221", "222", "223", "224", "225", "226", "227", "228", "229",
 401   "230", "231", "232", "233", "234", "235", "236", "237", "238", "239",
 402   "240", "241", "242", "243", "244", "245", "246", "247", "248", "249",
 403   "250", "251", "252", "253", "254", "255"
 404 };
 405
 406 /*
 407  * ircd_ntoa - rewrote and renamed yet again :) --Bleep
 408  * inetntoa - in_addr to string
 409  *      changed name to remove collision possibility and
 410  *      so behaviour is guaranteed to take a pointer arg.
 411  *      -avalon 23/11/92
 412  *  inet_ntoa --  returned the dotted notation of a given
 413  *      internet number
 414  *      argv 11/90).
 415  *  inet_ntoa --  its broken on some Ultrix/Dynix too. -avalon
 416  */
 417 const char* ircd_ntoa(const char* in)
 418 {
 419   static char buf[20];
 420   return ircd_ntoa_r(buf, in);
 421 }
 422
 423 /*
 424  * reentrant version of above
 425  */
 426 const char* ircd_ntoa_r(char* buf, const char* in)
 427 {
 428   char*                p = buf;
 429   const unsigned char* a = (const unsigned char*)in;
 430   const char*          n;
 431
 432   assert(0 != buf);
 433   assert(0 != in);
 434
 435   n = IpQuadTab[*a++];
 436   while ((*p = *n++))
 437     ++p;
 438   *p++ = '.';
 439   n = IpQuadTab[*a++];
 440   while ((*p = *n++))
 441     ++p;
 442   *p++ = '.';
 443   n = IpQuadTab[*a++];
 444   while ((*p = *n++))
 445     ++p;
 446   *p++ = '.';
 447   n = IpQuadTab[*a];
 448   while ((*p = *n++))
 449     ++p;
 450   return buf;
 451 }
 452
 453