ircd/ircd_string.c

   1 /*
   2  * IRC - Internet Relay Chat, ircd/ircd_string.c
   3  * Copyright (C) 1999 Thomas Helvey
   4  *
   5  * This program is free software; you can redistribute it and/or modify
   6  * it under the terms of the GNU General Public License as published by
   7  * the Free Software Foundation; either version 1, or (at your option)
   8  * any later version.
   9  *
  10  * This program is distributed in the hope that it will be useful,
  11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13  * GNU General Public License for more details.
  14  *
  15  * You should have received a copy of the GNU General Public License
  16  * along with this program; if not, write to the Free Software
  17  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  18  *
  19  * $Id$
  20  */
  21 #include "config.h"
  22
  23 #include "ircd_string.h"
  24 #include "ircd_defs.h"
  25 #include "ircd_chattr.h"
  26 #include "ircd_log.h"
  27 #include <assert.h>
  28 #include <string.h>
  29 #include <regex.h>
  30 /*
  31  * include the character attribute tables here
  32  */
  33 #include "chattr.tab.c"
  34
  35
  36 /*
  37  * Disallow a hostname label to contain anything but a [-a-zA-Z0-9].
  38  * It may not start or end on a '.'.
  39  * A label may not end on a '-', the maximum length of a label is
  40  * 63 characters.
  41  * On top of that (which seems to be the RFC) we demand that the
  42  * top domain does not contain any digits.
  43  */
  44 static const char* hostExpr = "^([-0-9A-Za-z]*[0-9A-Za-z]\\.)+[A-Za-z]+$";
  45 static regex_t hostRegex;
  46
  47 static const char* addrExpr =
  48     "^((25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9]?[0-9])\\.){1,3}"
  49     "(25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9]?[0-9])$";
  50 static regex_t addrRegex;
  51
  52 int init_string(void)
  53 {
  54   /*
  55    * initialize matching expressions
  56    * XXX - expressions MUST be correct, don't change expressions
  57    * without testing them. Might be a good idea to exit if these fail,
  58    * important code depends on them.
  59    * TODO: use regerror for an error message
  60    */
  61   if (regcomp(&hostRegex, hostExpr, REG_EXTENDED | REG_NOSUB))
  62     return 0;
  63
  64   if (regcomp(&addrRegex, addrExpr, REG_EXTENDED | REG_NOSUB))
  65     return 0;
  66   return 1;
  67 }
  68
  69 int string_is_hostname(const char* str)
  70 {
  71   assert(0 != str);
  72   return (strlen(str) <= HOSTLEN && 0 == regexec(&hostRegex, str, 0, 0, 0));
  73 }
  74
  75 int string_is_address(const char* str)
  76 {
  77   assert(0 != str);
  78   return (0 == regexec(&addrRegex, str, 0, 0, 0));
  79 }
  80
  81 int string_has_wildcards(const char* str)
  82 {
  83   assert(0 != str);
  84   for ( ; *str; ++str) {
  85     if ('\\' == *str) {
  86       if ('\0' == *++str)
  87         break;
  88     }
  89     else if ('*' == *str || '?' == *str)
  90       return 1;
  91   }
  92   return 0;
  93 }
  94
  95 unsigned int hash_pjw(const char* str)
  96 {
  97   unsigned h = 0;
  98   unsigned g;
  99   assert(str);
 100
 101   for ( ; *str; ++str) {
 102     h = (h << 4) + *str;
 103     if ((g = h & 0xf0000000)) {
 104       h ^= g >> 24;  /* fold top four bits onto ------X- */
 105       h ^= g;        /* clear top four bits */
 106     }
 107   }
 108   return h;
 109 }
 110
 111 /*
 112  * strtoken.c
 113  *
 114  * Walk through a string of tokens, using a set of separators.
 115  * -argv 9/90
 116  */
 117 char* ircd_strtok(char **save, char *str, char *fs)
 118 {
 119   char *pos = *save;            /* keep last position across calls */
 120   char *tmp;
 121
 122   if (str)
 123     pos = str;                  /* new string scan */
 124
 125   while (pos && *pos && strchr(fs, *pos) != NULL)
 126     pos++;                      /* skip leading separators */
 127
 128   if (!pos || !*pos)
 129     return (pos = *save = NULL);        /* string contains only sep's */
 130
 131   tmp = pos;                    /* now, keep position of the token */
 132
 133   while (*pos && strchr(fs, *pos) == NULL)
 134     pos++;                      /* skip content of the token */
 135
 136   if (*pos)
 137     *pos++ = '\0';              /* remove first sep after the token */
 138   else
 139     pos = NULL;                 /* end of string */
 140
 141   *save = pos;
 142   return (tmp);
 143 }
 144
 145 /*
 146  * canonize
 147  *
 148  * reduce a string of duplicate list entries to contain only the unique
 149  * items.  Unavoidably O(n^2).
 150  */
 151 char* canonize(char* buffer)
 152 {
 153   static char cbuf[BUFSIZE];
 154   char*       s;
 155   char*       t;
 156   char*       cp = cbuf;
 157   int         l = 0;
 158   char*       p = NULL;
 159   char*       p2;
 160
 161   *cp = '\0';
 162
 163   for (s = ircd_strtok(&p, buffer, ","); s; s = ircd_strtok(&p, NULL, ","))
 164   {
 165     if (l)
 166     {
 167       p2 = NULL;
 168       for (t = ircd_strtok(&p2, cbuf, ","); t; t = ircd_strtok(&p2, NULL, ","))
 169         if (0 == ircd_strcmp(s, t))
 170           break;
 171         else if (p2)
 172           p2[-1] = ',';
 173     }
 174     else
 175       t = NULL;
 176     if (!t)
 177     {
 178       if (l)
 179         *(cp - 1) = ',';
 180       else
 181         l = 1;
 182       strcpy(cp, s);
 183       if (p)
 184         cp += (p - s);
 185     }
 186     else if (p2)
 187       p2[-1] = ',';
 188   }
 189   return cbuf;
 190 }
 191
 192 /*
 193  * ircd_strncpy - optimized strncpy
 194  * This may not look like it would be the fastest possible way to do it,
 195  * but it generally outperforms everything else on many platforms,
 196  * including asm library versions and memcpy, if compiled with the
 197  * optimizer on. (-O2 for gcc) --Bleep
 198  */
 199 char* ircd_strncpy(char* s1, const char* s2, size_t n)
 200 {
 201   char* endp = s1 + n;
 202   char* s = s1;
 203
 204   assert(0 != s1);
 205   assert(0 != s2);
 206
 207   while (s < endp && (*s++ = *s2++))
 208     ;
 209   return s1;
 210 }
 211
 212
 213 #ifndef FORCEINLINE
 214 NTL_HDR_strChattr { NTL_SRC_strChattr }
 215 NTL_HDR_strCasediff { NTL_SRC_strCasediff }
 216 #endif /* !FORCEINLINE */
 217
 218 /*
 219  * Other functions visible externally
 220  */
 221
 222 int strnChattr(const char *s, size_t n)
 223 {
 224   const char *rs = s;
 225   unsigned int x = ~0;
 226   int r = n;
 227   while (*rs && r--)
 228     x &= IRCD_CharAttrTab[*rs++ - CHAR_MIN];
 229   return x;
 230 }
 231
 232 /*
 233  * ircd_strcmp - case insensitive comparison of 2 strings
 234  * NOTE: see ircd_chattr.h for notes on case mapping.
 235  */
 236 int ircd_strcmp(const char *a, const char *b)
 237 {
 238   const char* ra = a;
 239   const char* rb = b;
 240   while (ToLower(*ra) == ToLower(*rb)) {
 241     if (!*ra++)
 242       return 0;
 243     else
 244       ++rb;
 245   }
 246   return (*ra - *rb);
 247 }
 248
 249 /*
 250  * ircd_strncmp - counted case insensitive comparison of 2 strings
 251  * NOTE: see ircd_chattr.h for notes on case mapping.
 252  */
 253 int ircd_strncmp(const char *a, const char *b, size_t n)
 254 {
 255   const char* ra = a;
 256   const char* rb = b;
 257   int left = n;
 258   if (!left--)
 259     return 0;
 260   while (ToLower(*ra) == ToLower(*rb)) {
 261     if (!*ra++ || !left--)
 262       return 0;
 263     else
 264       ++rb;
 265   }
 266   return (*ra - *rb);
 267 }
 268
 269 /*
 270  * unique_name_vector - create a unique vector of names from
 271  * a token separated list
 272  * list   - [in]  a token delimited null terminated character array
 273  * token  - [in]  the token to replace
 274  * vector - [out] vector of strings to be returned
 275  * size   - [in]  maximum number of elements to place in vector
 276  * Returns count of elements placed into the vector, if the list
 277  * is an empty string { '\0' } 0 is returned.
 278  * list, and vector must be non-null and size must be > 0
 279  * Empty strings <token><token> are not placed in the vector or counted.
 280  * This function ignores all subsequent tokens when count == size
 281  *
 282  * NOTE: this function destroys it's input, do not use list after it
 283  * is passed to this function
 284  */
 285 int unique_name_vector(char* list, char token, char** vector, int size)
 286 {
 287   int   i;
 288   int   count = 0;
 289   char* start = list;
 290   char* end;
 291
 292   assert(0 != list);
 293   assert(0 != vector);
 294   assert(0 < size);
 295
 296   /*
 297    * ignore spurious tokens
 298    */
 299   while (token == *start)
 300     ++start;
 301
 302   for (end = strchr(start, token); end; end = strchr(start, token)) {
 303     *end++ = '\0';
 304     /*
 305      * ignore spurious tokens
 306      */
 307     while (token == *end)
 308       ++end;
 309     for (i = 0; i < count; ++i) {
 310       if (0 == ircd_strcmp(vector[i], start))
 311         break;
 312     }
 313     if (i == count) {
 314       vector[count++] = start;
 315       if (count == size)
 316         return count;
 317     }
 318     start = end;
 319   }
 320   if (*start) {
 321     for (i = 0; i < count; ++i)
 322       if (0 == ircd_strcmp(vector[i], start))
 323         return count;
 324     vector[count++] = start;
 325   }
 326   return count;
 327 }
 328
 329 /*
 330  * token_vector - create a vector of tokens from
 331  * a token separated list
 332  * list   - [in]  a token delimited null terminated character array
 333  * token  - [in]  the token to replace
 334  * vector - [out] vector of strings to be returned
 335  * size   - [in]  maximum number of elements to place in vector
 336  * returns count of elements placed into the vector, if the list
 337  * is an empty string { '\0' } 0 is returned.
 338  * list, and vector must be non-null and size must be > 1
 339  * Empty tokens are counted and placed in the list
 340  *
 341  * NOTE: this function destroys it's input, do not use list after it
 342  * is passed to this function
 343  */
 344 int token_vector(char* list, char token, char** vector, int size)
 345 {
 346   int   count = 0;
 347   char* start = list;
 348   char* end;
 349
 350   assert(0 != list);
 351   assert(0 != vector);
 352   assert(1 < size);
 353
 354   vector[count++] = start;
 355   for (end = strchr(start, token); end; end = strchr(start, token)) {
 356     *end++ = '\0';
 357     start = end;
 358     if (*start) {
 359       vector[count++] = start;
 360       if (count < size)
 361         continue;
 362     }
 363     break;
 364   }
 365   return count;
 366 }
 367
 368 /*
 369  * host_from_uh - get the host.domain part of a user@host.domain string
 370  * ripped from get_sockhost
 371  */
 372 char* host_from_uh(char* host, const char* userhost, size_t n)
 373 {
 374   const char* s;
 375
 376   assert(0 != host);
 377   assert(0 != userhost);
 378
 379   if ((s = strchr(userhost, '@')))
 380     ++s;
 381   else
 382     s = userhost;
 383   ircd_strncpy(host, s, n);
 384   host[n] = '\0';
 385   return host;
 386 }
 387
 388 /*
 389  * this new faster inet_ntoa was ripped from:
 390  * From: Thomas Helvey <tomh@inxpress.net>
 391  */
 392 static const char* IpQuadTab[] =
 393 {
 394     "0",   "1",   "2",   "3",   "4",   "5",   "6",   "7",   "8",   "9",
 395    "10",  "11",  "12",  "13",  "14",  "15",  "16",  "17",  "18",  "19",
 396    "20",  "21",  "22",  "23",  "24",  "25",  "26",  "27",  "28",  "29",
 397    "30",  "31",  "32",  "33",  "34",  "35",  "36",  "37",  "38",  "39",
 398    "40",  "41",  "42",  "43",  "44",  "45",  "46",  "47",  "48",  "49",
 399    "50",  "51",  "52",  "53",  "54",  "55",  "56",  "57",  "58",  "59",
 400    "60",  "61",  "62",  "63",  "64",  "65",  "66",  "67",  "68",  "69",
 401    "70",  "71",  "72",  "73",  "74",  "75",  "76",  "77",  "78",  "79",
 402    "80",  "81",  "82",  "83",  "84",  "85",  "86",  "87",  "88",  "89",
 403    "90",  "91",  "92",  "93",  "94",  "95",  "96",  "97",  "98",  "99",
 404   "100", "101", "102", "103", "104", "105", "106", "107", "108", "109",
 405   "110", "111", "112", "113", "114", "115", "116", "117", "118", "119",
 406   "120", "121", "122", "123", "124", "125", "126", "127", "128", "129",
 407   "130", "131", "132", "133", "134", "135", "136", "137", "138", "139",
 408   "140", "141", "142", "143", "144", "145", "146", "147", "148", "149",
 409   "150", "151", "152", "153", "154", "155", "156", "157", "158", "159",
 410   "160", "161", "162", "163", "164", "165", "166", "167", "168", "169",
 411   "170", "171", "172", "173", "174", "175", "176", "177", "178", "179",
 412   "180", "181", "182", "183", "184", "185", "186", "187", "188", "189",
 413   "190", "191", "192", "193", "194", "195", "196", "197", "198", "199",
 414   "200", "201", "202", "203", "204", "205", "206", "207", "208", "209",
 415   "210", "211", "212", "213", "214", "215", "216", "217", "218", "219",
 416   "220", "221", "222", "223", "224", "225", "226", "227", "228", "229",
 417   "230", "231", "232", "233", "234", "235", "236", "237", "238", "239",
 418   "240", "241", "242", "243", "244", "245", "246", "247", "248", "249",
 419   "250", "251", "252", "253", "254", "255"
 420 };
 421
 422 /*
 423  * ircd_ntoa - rewrote and renamed yet again :) --Bleep
 424  * inetntoa - in_addr to string
 425  *      changed name to remove collision possibility and
 426  *      so behaviour is guaranteed to take a pointer arg.
 427  *      -avalon 23/11/92
 428  *  inet_ntoa --  returned the dotted notation of a given
 429  *      internet number
 430  *      argv 11/90).
 431  *  inet_ntoa --  its broken on some Ultrix/Dynix too. -avalon
 432  */
 433 const char* ircd_ntoa(const char* in)
 434 {
 435   static char buf[20];
 436   return ircd_ntoa_r(buf, in);
 437 }
 438
 439 /*
 440  * reentrant version of above
 441  */
 442 const char* ircd_ntoa_r(char* buf, const char* in)
 443 {
 444   char*                p = buf;
 445   const unsigned char* a = (const unsigned char*)in;
 446   const char*          n;
 447
 448   assert(0 != buf);
 449   assert(0 != in);
 450
 451   n = IpQuadTab[*a++];
 452   while ((*p = *n++))
 453     ++p;
 454   *p++ = '.';
 455   n = IpQuadTab[*a++];
 456   while ((*p = *n++))
 457     ++p;
 458   *p++ = '.';
 459   n = IpQuadTab[*a++];
 460   while ((*p = *n++))
 461     ++p;
 462   *p++ = '.';
 463   n = IpQuadTab[*a];
 464   while ((*p = *n++))
 465     ++p;
 466   return buf;
 467 }
 468
 469