2 * IRC - Internet Relay Chat, ircd/ircd_string.c
3 * Copyright (C) 1999 Thomas Helvey
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 1, or (at your option)
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 #include "ircd_string.h"
24 #include "ircd_defs.h"
25 #include "ircd_chattr.h"
32 #include <sys/types.h>
33 #include <netinet/in.h>
36 * include the character attribute tables here
38 #include "chattr.tab.c"
42 * Disallow a hostname label to contain anything but a [-a-zA-Z0-9].
43 * It may not start or end on a '.'.
44 * A label may not end on a '-', the maximum length of a label is
46 * On top of that (which seems to be the RFC) we demand that the
47 * top domain does not contain any digits.
49 static const char* hostExpr = "^([-0-9A-Za-z]*[0-9A-Za-z]\\.)+[A-Za-z]+$";
50 static regex_t hostRegex;
52 static const char* addrExpr =
53 "^((25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9]?[0-9])\\.){1,3}"
54 "(25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9]?[0-9])$";
55 static regex_t addrRegex;
60 * initialize matching expressions
61 * XXX - expressions MUST be correct, don't change expressions
62 * without testing them. Might be a good idea to exit if these fail,
63 * important code depends on them.
64 * TODO: use regerror for an error message
66 if (regcomp(&hostRegex, hostExpr, REG_EXTENDED | REG_NOSUB))
69 if (regcomp(&addrRegex, addrExpr, REG_EXTENDED | REG_NOSUB))
74 int string_is_hostname(const char* str)
77 return (strlen(str) <= HOSTLEN && 0 == regexec(&hostRegex, str, 0, 0, 0));
80 int string_is_address(const char* str)
83 return (0 == regexec(&addrRegex, str, 0, 0, 0));
86 int string_has_wildcards(const char* str)
89 for ( ; *str; ++str) {
94 else if ('*' == *str || '?' == *str)
100 unsigned int hash_pjw(const char* str)
106 for ( ; *str; ++str) {
108 if ((g = h & 0xf0000000)) {
109 h ^= g >> 24; /* fold top four bits onto ------X- */
110 h ^= g; /* clear top four bits */
119 * Walk through a string of tokens, using a set of separators.
122 char* ircd_strtok(char **save, char *str, char *fs)
124 char *pos = *save; /* keep last position across calls */
128 pos = str; /* new string scan */
130 while (pos && *pos && strchr(fs, *pos) != NULL)
131 pos++; /* skip leading separators */
134 return (pos = *save = NULL); /* string contains only sep's */
136 tmp = pos; /* now, keep position of the token */
138 while (*pos && strchr(fs, *pos) == NULL)
139 pos++; /* skip content of the token */
142 *pos++ = '\0'; /* remove first sep after the token */
144 pos = NULL; /* end of string */
153 * reduce a string of duplicate list entries to contain only the unique
154 * items. Unavoidably O(n^2).
156 char* canonize(char* buffer)
158 static char cbuf[BUFSIZE];
168 for (s = ircd_strtok(&p, buffer, ","); s; s = ircd_strtok(&p, NULL, ","))
173 for (t = ircd_strtok(&p2, cbuf, ","); t; t = ircd_strtok(&p2, NULL, ","))
174 if (0 == ircd_strcmp(s, t))
198 * ircd_strncpy - optimized strncpy
199 * This may not look like it would be the fastest possible way to do it,
200 * but it generally outperforms everything else on many platforms,
201 * including asm library versions and memcpy, if compiled with the
202 * optimizer on. (-O2 for gcc) --Bleep
204 char* ircd_strncpy(char* s1, const char* s2, size_t n)
212 while (s < endp && (*s++ = *s2++))
219 NTL_HDR_strChattr { NTL_SRC_strChattr }
220 NTL_HDR_strCasediff { NTL_SRC_strCasediff }
221 #endif /* !FORCEINLINE */
224 * Other functions visible externally
227 int strnChattr(const char *s, size_t n)
233 x &= IRCD_CharAttrTab[*rs++ - CHAR_MIN];
238 * ircd_strcmp - case insensitive comparison of 2 strings
239 * NOTE: see ircd_chattr.h for notes on case mapping.
241 int ircd_strcmp(const char *a, const char *b)
245 while (ToLower(*ra) == ToLower(*rb)) {
255 * ircd_strncmp - counted case insensitive comparison of 2 strings
256 * NOTE: see ircd_chattr.h for notes on case mapping.
258 int ircd_strncmp(const char *a, const char *b, size_t n)
265 while (ToLower(*ra) == ToLower(*rb)) {
266 if (!*ra++ || !left--)
275 * unique_name_vector - create a unique vector of names from
276 * a token separated list
277 * list - [in] a token delimited null terminated character array
278 * token - [in] the token to replace
279 * vector - [out] vector of strings to be returned
280 * size - [in] maximum number of elements to place in vector
281 * Returns count of elements placed into the vector, if the list
282 * is an empty string { '\0' } 0 is returned.
283 * list, and vector must be non-null and size must be > 0
284 * Empty strings <token><token> are not placed in the vector or counted.
285 * This function ignores all subsequent tokens when count == size
287 * NOTE: this function destroys it's input, do not use list after it
288 * is passed to this function
290 int unique_name_vector(char* list, char token, char** vector, int size)
302 * ignore spurious tokens
304 while (token == *start)
307 for (end = strchr(start, token); end; end = strchr(start, token)) {
310 * ignore spurious tokens
312 while (token == *end)
314 for (i = 0; i < count; ++i) {
315 if (0 == ircd_strcmp(vector[i], start))
319 vector[count++] = start;
326 for (i = 0; i < count; ++i)
327 if (0 == ircd_strcmp(vector[i], start))
329 vector[count++] = start;
335 * token_vector - create a vector of tokens from
336 * a token separated list
337 * list - [in] a token delimited null terminated character array
338 * token - [in] the token to replace
339 * vector - [out] vector of strings to be returned
340 * size - [in] maximum number of elements to place in vector
341 * returns count of elements placed into the vector, if the list
342 * is an empty string { '\0' } 0 is returned.
343 * list, and vector must be non-null and size must be > 1
344 * Empty tokens are counted and placed in the list
346 * NOTE: this function destroys it's input, do not use list after it
347 * is passed to this function
349 int token_vector(char* list, char token, char** vector, int size)
359 vector[count++] = start;
360 for (end = strchr(start, token); end; end = strchr(start, token)) {
364 vector[count++] = start;
374 * host_from_uh - get the host.domain part of a user@host.domain string
375 * ripped from get_sockhost
377 char* host_from_uh(char* host, const char* userhost, size_t n)
382 assert(0 != userhost);
384 if ((s = strchr(userhost, '@')))
388 ircd_strncpy(host, s, n);
394 * this new faster inet_ntoa was ripped from:
395 * From: Thomas Helvey <tomh@inxpress.net>
397 static const char* IpQuadTab[] =
399 "0", "1", "2", "3", "4", "5", "6", "7", "8", "9",
400 "10", "11", "12", "13", "14", "15", "16", "17", "18", "19",
401 "20", "21", "22", "23", "24", "25", "26", "27", "28", "29",
402 "30", "31", "32", "33", "34", "35", "36", "37", "38", "39",
403 "40", "41", "42", "43", "44", "45", "46", "47", "48", "49",
404 "50", "51", "52", "53", "54", "55", "56", "57", "58", "59",
405 "60", "61", "62", "63", "64", "65", "66", "67", "68", "69",
406 "70", "71", "72", "73", "74", "75", "76", "77", "78", "79",
407 "80", "81", "82", "83", "84", "85", "86", "87", "88", "89",
408 "90", "91", "92", "93", "94", "95", "96", "97", "98", "99",
409 "100", "101", "102", "103", "104", "105", "106", "107", "108", "109",
410 "110", "111", "112", "113", "114", "115", "116", "117", "118", "119",
411 "120", "121", "122", "123", "124", "125", "126", "127", "128", "129",
412 "130", "131", "132", "133", "134", "135", "136", "137", "138", "139",
413 "140", "141", "142", "143", "144", "145", "146", "147", "148", "149",
414 "150", "151", "152", "153", "154", "155", "156", "157", "158", "159",
415 "160", "161", "162", "163", "164", "165", "166", "167", "168", "169",
416 "170", "171", "172", "173", "174", "175", "176", "177", "178", "179",
417 "180", "181", "182", "183", "184", "185", "186", "187", "188", "189",
418 "190", "191", "192", "193", "194", "195", "196", "197", "198", "199",
419 "200", "201", "202", "203", "204", "205", "206", "207", "208", "209",
420 "210", "211", "212", "213", "214", "215", "216", "217", "218", "219",
421 "220", "221", "222", "223", "224", "225", "226", "227", "228", "229",
422 "230", "231", "232", "233", "234", "235", "236", "237", "238", "239",
423 "240", "241", "242", "243", "244", "245", "246", "247", "248", "249",
424 "250", "251", "252", "253", "254", "255"
428 * ircd_ntoa - rewrote and renamed yet again :) --Bleep
429 * inetntoa - in_addr to string
430 * changed name to remove collision possibility and
431 * so behaviour is guaranteed to take a pointer arg.
433 * inet_ntoa -- returned the dotted notation of a given
436 * inet_ntoa -- its broken on some Ultrix/Dynix too. -avalon
438 const char* ircd_ntoa(const struct irc_in_addr* in)
440 static char buf[SOCKIPLEN];
441 return ircd_ntoa_r(buf, in);
444 /* This doesn't really belong here, but otherwise umkpasswd breaks. */
445 int irc_in_addr_is_ipv4(const struct irc_in_addr *addr)
447 return addr->in6_16[0] == 0
448 && addr->in6_16[1] == 0
449 && addr->in6_16[2] == 0
450 && addr->in6_16[3] == 0
451 && addr->in6_16[4] == 0
452 && (addr->in6_16[5] == 0 || addr->in6_16[5] == 0xffff)
453 && addr->in6_16[6] != 0;
457 * reentrant version of above
459 const char* ircd_ntoa_r(char* buf, const struct irc_in_addr* in)
464 if (irc_in_addr_is_ipv4(in)) {
465 unsigned int pos, len;
468 pch = (unsigned char*)&in->in6_16[6];
469 len = strlen(IpQuadTab[*pch]);
470 memcpy(buf, IpQuadTab[*pch++], len);
473 len = strlen(IpQuadTab[*pch]);
474 memcpy(buf+pos, IpQuadTab[*pch++], len);
477 len = strlen(IpQuadTab[*pch]);
478 memcpy(buf+pos, IpQuadTab[*pch++], len);
481 len = strlen(IpQuadTab[*pch]);
482 memcpy(buf+pos, IpQuadTab[*pch++], len);
483 buf[pos + len] = '\0';
486 static const char hexdigits[] = "0123456789abcdef";
487 unsigned int pos, part, max_start, max_zeros, curr_zeros, ii;
489 /* Find longest run of zeros. */
490 for (max_start = ii = 1, max_zeros = curr_zeros = 0; ii < 8; ++ii) {
493 else if (curr_zeros > max_zeros) {
494 max_start = ii - curr_zeros;
495 max_zeros = curr_zeros;
499 if (curr_zeros > max_zeros) {
500 max_start = ii - curr_zeros;
501 max_zeros = curr_zeros;
504 /* Print out address. */
505 #define APPEND(CH) do { buf[pos++] = (CH); } while (0)
506 for (pos = ii = 0; (ii < 8); ++ii) {
507 if ((max_zeros > 0) && (ii == max_start)) {
512 part = ntohs(in->in6_16[ii]);
514 APPEND(hexdigits[part >> 12]);
516 APPEND(hexdigits[(part >> 8) & 15]);
518 APPEND(hexdigits[(part >> 4) & 15]);
519 APPEND(hexdigits[part & 15]);
523 if (max_zeros + max_start == 8)
527 /* Nul terminate and return number of characters used. */
534 ircd_aton_ip4(const char *input, unsigned int *output)
536 unsigned int dots = 0, pos = 0, part = 0, ip = 0;
538 /* Intentionally no support for bizarre IPv4 formats (plain
539 * integers, octal or hex components) -- only vanilla dotted
545 if (IsDigit(input[pos])) {
546 part = part * 10 + input[pos++] - '0';
549 if ((dots == 3) && !IsDigit(input[pos])) {
550 *output = htonl(ip | part);
553 } else if (input[pos] == '.') {
554 if (input[++pos] == '.')
556 ip |= part << (24 - 8 * dots++);
563 /* ircd_aton - Parse a numeric IPv4 or IPv6 address into an irc_in_addr.
564 * Returns number of characters used by address, or 0 if the address was
565 * unparseable or malformed.
568 ircd_aton(struct irc_in_addr *ip, const char *input)
575 memset(ip, 0, sizeof(*ip));
576 colon = strchr(input, ':');
577 dot = strchr(input, '.');
579 if (colon && (!dot || (dot > colon))) {
580 unsigned int part = 0, pos = 0, ii = 0, colon = 8;
581 const char *part_start = NULL;
583 /* Parse IPv6, possibly like ::127.0.0.1.
584 * This is pretty straightforward; the only trick is borrowed
585 * from Paul Vixie (BIND): when it sees a "::" continue as if
586 * it were a single ":", but note where it happened, and fill
587 * with zeros afterwards.
589 if (input[pos] == ':') {
590 if ((input[pos+1] != ':') || (input[pos+2] == ':'))
598 switch (input[pos]) {
599 case '0': case '1': case '2': case '3': case '4':
600 case '5': case '6': case '7': case '8': case '9':
601 chval = input[pos] - '0';
603 part = (part << 4) | chval;
608 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
609 chval = input[pos] - 'A' + 10;
611 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
612 chval = input[pos] - 'a' + 10;
615 part_start = input + ++pos;
616 if (input[pos] == '.')
618 ip->in6_16[ii++] = htons(part);
620 if (input[pos] == ':') {
630 len = ircd_aton_ip4(input + pos, &ip4);
631 if (!len || (ii > 6))
633 ip->in6_16[ii++] = htons(ntohl(ip4) >> 16);
634 ip->in6_16[ii++] = htons(ntohl(ip4) & 65535);
642 /* Shift stuff after "::" up and fill middle with zeros. */
643 ip->in6_16[ii++] = htons(part);
644 for (jj = 0; jj < ii - colon; jj++)
645 ip->in6_16[7 - jj] = ip->in6_16[ii - jj - 1];
646 for (jj = 0; jj < 8 - ii; jj++)
647 ip->in6_16[colon + jj] = 0;
655 int len = ircd_aton_ip4(input, &addr);
657 ip->in6_16[6] = htons(ntohl(addr) >> 16);
658 ip->in6_16[7] = htons(ntohl(addr) & 65535);
662 return 0; /* parse failed */