ircd/ircd_string.c

   1 /*
   2  * IRC - Internet Relay Chat, ircd/ircd_string.c
   3  * Copyright (C) 1999 Thomas Helvey
   4  *
   5  * This program is free software; you can redistribute it and/or modify
   6  * it under the terms of the GNU General Public License as published by
   7  * the Free Software Foundation; either version 1, or (at your option)
   8  * any later version.
   9  *
  10  * This program is distributed in the hope that it will be useful,
  11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13  * GNU General Public License for more details.
  14  *
  15  * You should have received a copy of the GNU General Public License
  16  * along with this program; if not, write to the Free Software
  17  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  18  *
  19  * $Id$
  20  */
  21 #include "ircd_string.h"
  22 #include "ircd_defs.h"
  23 #include "ircd_chattr.h"
  24 #include "ircd_log.h"
  25 #include <assert.h>
  26 #include <string.h>
  27 #include <regex.h>
  28 /*
  29  * include the character attribute tables here
  30  */
  31 #include "chattr.tab.c"
  32
  33
  34 /*
  35  * Disallow a hostname label to contain anything but a [-a-zA-Z0-9].
  36  * It may not start or end on a '.'.
  37  * A label may not end on a '-', the maximum length of a label is
  38  * 63 characters.
  39  * On top of that (which seems to be the RFC) we demand that the
  40  * top domain does not contain any digits.
  41  */
  42 static const char* hostExpr = "^([-0-9A-Za-z]*[0-9A-Za-z]\\.)+[A-Za-z]+$";
  43 static regex_t hostRegex;
  44
  45 static const char* addrExpr =
  46     "^((25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9]?[0-9])\\.){1,3}(25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9]?[0-9])$";
  47 static regex_t addrRegex;
  48
  49 int init_string(void)
  50 {
  51   /*
  52    * initialize matching expressions
  53    * XXX - expressions MUST be correct, don't change expressions
  54    * without testing them. Might be a good idea to exit if these fail,
  55    * important code depends on them.
  56    * TODO: use regerror for an error message
  57    */
  58   if (regcomp(&hostRegex, hostExpr, REG_EXTENDED | REG_NOSUB))
  59     return 0;
  60
  61   if (regcomp(&addrRegex, addrExpr, REG_EXTENDED | REG_NOSUB))
  62     return 0;
  63   return 1;
  64 }
  65
  66 int string_is_hostname(const char* str)
  67 {
  68   assert(0 != str);
  69   return (strlen(str) <= HOSTLEN && 0 == regexec(&hostRegex, str, 0, 0, 0));
  70 }
  71
  72 int string_is_address(const char* str)
  73 {
  74   assert(0 != str);
  75   return (0 == regexec(&addrRegex, str, 0, 0, 0));
  76 }
  77
  78 /*
  79  * strtoken.c
  80  *
  81  * Walk through a string of tokens, using a set of separators.
  82  * -argv 9/90
  83  */
  84 char* ircd_strtok(char **save, char *str, char *fs)
  85 {
  86   char *pos = *save;            /* keep last position across calls */
  87   char *tmp;
  88
  89   if (str)
  90     pos = str;                  /* new string scan */
  91
  92   while (pos && *pos && strchr(fs, *pos) != NULL)
  93     pos++;                      /* skip leading separators */
  94
  95   if (!pos || !*pos)
  96     return (pos = *save = NULL);        /* string contains only sep's */
  97
  98   tmp = pos;                    /* now, keep position of the token */
  99
 100   while (*pos && strchr(fs, *pos) == NULL)
 101     pos++;                      /* skip content of the token */
 102
 103   if (*pos)
 104     *pos++ = '\0';              /* remove first sep after the token */
 105   else
 106     pos = NULL;                 /* end of string */
 107
 108   *save = pos;
 109   return (tmp);
 110 }
 111
 112 /*
 113  * canonize
 114  *
 115  * reduce a string of duplicate list entries to contain only the unique
 116  * items.  Unavoidably O(n^2).
 117  */
 118 char* canonize(char* buffer)
 119 {
 120   static char cbuf[BUFSIZE];
 121   char*       s;
 122   char*       t;
 123   char*       cp = cbuf;
 124   int         l = 0;
 125   char*       p = NULL;
 126   char*       p2;
 127
 128   *cp = '\0';
 129
 130   for (s = ircd_strtok(&p, buffer, ","); s; s = ircd_strtok(&p, NULL, ","))
 131   {
 132     if (l)
 133     {
 134       p2 = NULL;
 135       for (t = ircd_strtok(&p2, cbuf, ","); t; t = ircd_strtok(&p2, NULL, ","))
 136         if (0 == ircd_strcmp(s, t))
 137           break;
 138         else if (p2)
 139           p2[-1] = ',';
 140     }
 141     else
 142       t = NULL;
 143     if (!t)
 144     {
 145       if (l)
 146         *(cp - 1) = ',';
 147       else
 148         l = 1;
 149       strcpy(cp, s);
 150       if (p)
 151         cp += (p - s);
 152     }
 153     else if (p2)
 154       p2[-1] = ',';
 155   }
 156   return cbuf;
 157 }
 158
 159 /*
 160  * ircd_strncpy - optimized strncpy
 161  * This may not look like it would be the fastest possible way to do it,
 162  * but it generally outperforms everything else on many platforms,
 163  * including asm library versions and memcpy, if compiled with the
 164  * optimizer on. (-O2 for gcc) --Bleep
 165  */
 166 char* ircd_strncpy(char* s1, const char* s2, size_t n)
 167 {
 168   char* endp = s1 + n;
 169   char* s = s1;
 170
 171   assert(0 != s1);
 172   assert(0 != s2);
 173
 174   while (s < endp && (*s++ = *s2++))
 175     ;
 176   return s1;
 177 }
 178
 179
 180 #ifndef FORCEINLINE
 181 NTL_HDR_strChattr { NTL_SRC_strChattr }
 182 NTL_HDR_strCasediff { NTL_SRC_strCasediff }
 183 #endif /* !FORCEINLINE */
 184
 185 /*
 186  * Other functions visible externally
 187  */
 188
 189 int strnChattr(const char *s, size_t n)
 190 {
 191   const char *rs = s;
 192   unsigned int x = ~0;
 193   int r = n;
 194   while (*rs && r--)
 195     x &= IRCD_CharAttrTab[*rs++ - CHAR_MIN];
 196   return x;
 197 }
 198
 199 /*
 200  * ircd_strcmp - case insensitive comparison of 2 strings
 201  * NOTE: see ircd_chattr.h for notes on case mapping.
 202  */
 203 int ircd_strcmp(const char *a, const char *b)
 204 {
 205   const char* ra = a;
 206   const char* rb = b;
 207   while (ToLower(*ra) == ToLower(*rb)) {
 208     if (!*ra++)
 209       return 0;
 210     else
 211       ++rb;
 212   }
 213   return (*ra - *rb);
 214 }
 215
 216 /*
 217  * ircd_strncmp - counted case insensitive comparison of 2 strings
 218  * NOTE: see ircd_chattr.h for notes on case mapping.
 219  */
 220 int ircd_strncmp(const char *a, const char *b, size_t n)
 221 {
 222   const char* ra = a;
 223   const char* rb = b;
 224   int left = n;
 225   if (!left--)
 226     return 0;
 227   while (ToLower(*ra) == ToLower(*rb)) {
 228     if (!*ra++ || !left--)
 229       return 0;
 230     else
 231       ++rb;
 232   }
 233   return (*ra - *rb);
 234 }
 235
 236 /*
 237  * unique_name_vector - create a unique vector of names from
 238  * a token separated list
 239  * list   - [in]  a token delimited null terminated character array
 240  * token  - [in]  the token to replace
 241  * vector - [out] vector of strings to be returned
 242  * size   - [in]  maximum number of elements to place in vector
 243  * Returns count of elements placed into the vector, if the list
 244  * is an empty string { '\0' } 0 is returned.
 245  * list, and vector must be non-null and size must be > 0
 246  * Empty strings <token><token> are not placed in the vector or counted.
 247  * This function ignores all subsequent tokens when count == size
 248  *
 249  * NOTE: this function destroys it's input, do not use list after it
 250  * is passed to this function
 251  */
 252 int unique_name_vector(char* list, char token, char** vector, int size)
 253 {
 254   int   i;
 255   int   count = 0;
 256   char* start = list;
 257   char* end;
 258
 259   assert(0 != list);
 260   assert(0 != vector);
 261   assert(0 < size);
 262
 263   /*
 264    * ignore spurious tokens
 265    */
 266   while (token == *start)
 267     ++start;
 268
 269   for (end = strchr(start, token); end; end = strchr(start, token)) {
 270     *end++ = '\0';
 271     /*
 272      * ignore spurious tokens
 273      */
 274     while (token == *end)
 275       ++end;
 276     for (i = 0; i < count; ++i) {
 277       if (0 == ircd_strcmp(vector[i], start))
 278         break;
 279     }
 280     if (i == count) {
 281       vector[count++] = start;
 282       if (count == size)
 283         return count;
 284     }
 285     start = end;
 286   }
 287   if (*start)
 288     vector[count++] = start;
 289
 290   return count;
 291 }
 292
 293 /*
 294  * token_vector - create a vector of tokens from
 295  * a token separated list
 296  * list   - [in]  a token delimited null terminated character array
 297  * token  - [in]  the token to replace
 298  * vector - [out] vector of strings to be returned
 299  * size   - [in]  maximum number of elements to place in vector
 300  * returns count of elements placed into the vector, if the list
 301  * is an empty string { '\0' } 0 is returned.
 302  * list, and vector must be non-null and size must be > 1
 303  * Empty tokens are counted and placed in the list
 304  *
 305  * NOTE: this function destroys it's input, do not use list after it
 306  * is passed to this function
 307  */
 308 int token_vector(char* list, char token, char** vector, int size)
 309 {
 310   int   count = 0;
 311   char* start = list;
 312   char* end;
 313
 314   assert(0 != list);
 315   assert(0 != vector);
 316   assert(1 < size);
 317
 318   vector[count++] = start;
 319   for (end = strchr(start, token); end; end = strchr(start, token)) {
 320     *end++ = '\0';
 321     start = end;
 322     if (*start) {
 323       vector[count++] = start;
 324       if (count < size)
 325         continue;
 326     }
 327     break;
 328   }
 329   return count;
 330 }
 331
 332 /*
 333  * host_from_uh - get the host.domain part of a user@host.domain string
 334  * ripped from get_sockhost
 335  */
 336 char* host_from_uh(char* host, const char* userhost, size_t n)
 337 {
 338   const char* s;
 339
 340   assert(0 != host);
 341   assert(0 != userhost);
 342
 343   if ((s = strchr(userhost, '@')))
 344     ++s;
 345   else
 346     s = userhost;
 347   ircd_strncpy(host, s, n);
 348   host[n] = '\0';
 349   return host;
 350 }
 351
 352 /*
 353  * this new faster inet_ntoa was ripped from:
 354  * From: Thomas Helvey <tomh@inxpress.net>
 355  */
 356 static const char* IpQuadTab[] =
 357 {
 358     "0",   "1",   "2",   "3",   "4",   "5",   "6",   "7",   "8",   "9",
 359    "10",  "11",  "12",  "13",  "14",  "15",  "16",  "17",  "18",  "19",
 360    "20",  "21",  "22",  "23",  "24",  "25",  "26",  "27",  "28",  "29",
 361    "30",  "31",  "32",  "33",  "34",  "35",  "36",  "37",  "38",  "39",
 362    "40",  "41",  "42",  "43",  "44",  "45",  "46",  "47",  "48",  "49",
 363    "50",  "51",  "52",  "53",  "54",  "55",  "56",  "57",  "58",  "59",
 364    "60",  "61",  "62",  "63",  "64",  "65",  "66",  "67",  "68",  "69",
 365    "70",  "71",  "72",  "73",  "74",  "75",  "76",  "77",  "78",  "79",
 366    "80",  "81",  "82",  "83",  "84",  "85",  "86",  "87",  "88",  "89",
 367    "90",  "91",  "92",  "93",  "94",  "95",  "96",  "97",  "98",  "99",
 368   "100", "101", "102", "103", "104", "105", "106", "107", "108", "109",
 369   "110", "111", "112", "113", "114", "115", "116", "117", "118", "119",
 370   "120", "121", "122", "123", "124", "125", "126", "127", "128", "129",
 371   "130", "131", "132", "133", "134", "135", "136", "137", "138", "139",
 372   "140", "141", "142", "143", "144", "145", "146", "147", "148", "149",
 373   "150", "151", "152", "153", "154", "155", "156", "157", "158", "159",
 374   "160", "161", "162", "163", "164", "165", "166", "167", "168", "169",
 375   "170", "171", "172", "173", "174", "175", "176", "177", "178", "179",
 376   "180", "181", "182", "183", "184", "185", "186", "187", "188", "189",
 377   "190", "191", "192", "193", "194", "195", "196", "197", "198", "199",
 378   "200", "201", "202", "203", "204", "205", "206", "207", "208", "209",
 379   "210", "211", "212", "213", "214", "215", "216", "217", "218", "219",
 380   "220", "221", "222", "223", "224", "225", "226", "227", "228", "229",
 381   "230", "231", "232", "233", "234", "235", "236", "237", "238", "239",
 382   "240", "241", "242", "243", "244", "245", "246", "247", "248", "249",
 383   "250", "251", "252", "253", "254", "255"
 384 };
 385
 386 /*
 387  * ircd_ntoa - rewrote and renamed yet again :) --Bleep
 388  * inetntoa - in_addr to string
 389  *      changed name to remove collision possibility and
 390  *      so behaviour is guaranteed to take a pointer arg.
 391  *      -avalon 23/11/92
 392  *  inet_ntoa --  returned the dotted notation of a given
 393  *      internet number
 394  *      argv 11/90).
 395  *  inet_ntoa --  its broken on some Ultrix/Dynix too. -avalon
 396  */
 397 const char* ircd_ntoa(const char* in)
 398 {
 399   static char buf[20];
 400   return ircd_ntoa_r(buf, in);
 401 }
 402
 403 /*
 404  * reentrant version of above
 405  */
 406 const char* ircd_ntoa_r(char* buf, const char* in)
 407 {
 408   char*                p = buf;
 409   const unsigned char* a = (const unsigned char*)in;
 410   const char*          n;
 411
 412   assert(0 != buf);
 413   assert(0 != in);
 414
 415   n = IpQuadTab[*a++];
 416   while ((*p = *n++))
 417     ++p;
 418   *p++ = '.';
 419   n = IpQuadTab[*a++];
 420   while ((*p = *n++))
 421     ++p;
 422   *p++ = '.';
 423   n = IpQuadTab[*a++];
 424   while ((*p = *n++))
 425     ++p;
 426   *p++ = '.';
 427   n = IpQuadTab[*a];
 428   while ((*p = *n++))
 429     ++p;
 430   return buf;
 431 }
 432
 433