2 * IRC - Internet Relay Chat, ircd/ircd_string.c
3 * Copyright (C) 1999 Thomas Helvey
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 1, or (at your option)
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 * @brief Implementation of string operations.
25 #include "ircd_string.h"
26 #include "ircd_defs.h"
27 #include "ircd_chattr.h"
34 #include <sys/types.h>
35 #include <netinet/in.h>
38 * include the character attribute tables here
40 #include "chattr.tab.c"
44 * Disallow a hostname label to contain anything but a [-a-zA-Z0-9].
45 * It may not start or end on a '.'.
46 * A label may not end on a '-', the maximum length of a label is
48 * On top of that (which seems to be the RFC) we demand that the
49 * top domain does not contain any digits.
51 /** Regular expresion to match a hostname.
52 * Matches zero or more alphanumeric labels followed by '.' and a
53 * final label that may only contain alphabetic characters.
55 static const char* hostExpr = "^([-0-9A-Za-z]*[0-9A-Za-z]\\.)+[A-Za-z]+$";
56 /** Compiled regex to match a hostname. Built from #hostExpr. */
57 static regex_t hostRegex;
59 /** Regular expression to match an IP address. */
60 static const char* addrExpr =
61 "^((25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9]?[0-9])\\.){1,3}"
62 "(25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9]?[0-9])$";
63 /** Compiled regex to match an IP address. Built from #addrExpr. */
64 static regex_t addrRegex;
66 /** Initialize the string matching code. */
70 * initialize matching expressions
71 * XXX - expressions MUST be correct, don't change expressions
72 * without testing them. Might be a good idea to exit if these fail,
73 * important code depends on them.
74 * TODO: use regerror for an error message
76 if (regcomp(&hostRegex, hostExpr, REG_EXTENDED | REG_NOSUB))
79 if (regcomp(&addrRegex, addrExpr, REG_EXTENDED | REG_NOSUB))
84 /** Check whether \a str looks like a hostname.
85 * @param[in] str String that might be a hostname.
86 * @return Non-zero if it conforms to the rules, zero if not.
88 int string_is_hostname(const char* str)
91 return (strlen(str) <= HOSTLEN && 0 == regexec(&hostRegex, str, 0, 0, 0));
94 /** Check whether \a str looks like an IP address.
95 * @param[in] str String that might be an address.
96 * @return Non-zero if it conforms to the rules, zero if not.
98 int string_is_address(const char* str)
101 return (0 == regexec(&addrRegex, str, 0, 0, 0));
104 /** Check whether \a str contains wildcard characters.
105 * @param[in] str String that might contain wildcards.
106 * @return Non-zero if \a str contains naked (non-escaped) wildcards,
107 * zero if there are none or if they are all escaped.
109 int string_has_wildcards(const char* str)
112 for ( ; *str; ++str) {
117 else if ('*' == *str || '?' == *str)
123 /** Split a string on certain delimiters.
124 * This is a reentrant version of normal strtok(). The first call for
125 * a particular input string must use a non-NULL \a str; *save will be
126 * initialized based on that. Later calls must use a NULL \a str;
127 * *save will be updated.
128 * @param[in,out] save Pointer to a position indicator.
129 * @param[in] str Pointer to the input string, or NULL to continue.
130 * @param[in] fs String that lists token delimiters.
131 * @return Next token in input string, or NULL if no tokens remain.
133 char* ircd_strtok(char **save, char *str, char *fs)
135 char *pos = *save; /* keep last position across calls */
139 pos = str; /* new string scan */
141 while (pos && *pos && strchr(fs, *pos) != NULL)
142 pos++; /* skip leading separators */
145 return (pos = *save = NULL); /* string contains only sep's */
147 tmp = pos; /* now, keep position of the token */
149 while (*pos && strchr(fs, *pos) == NULL)
150 pos++; /* skip content of the token */
153 *pos++ = '\0'; /* remove first sep after the token */
155 pos = NULL; /* end of string */
161 /** Rewrite a comma-delimited list of items to remove duplicates.
162 * @param[in,out] buffer Comma-delimited list.
163 * @return The input buffer \a buffer.
165 char* canonize(char* buffer)
167 static char cbuf[BUFSIZE];
177 for (s = ircd_strtok(&p, buffer, ","); s; s = ircd_strtok(&p, NULL, ","))
182 for (t = ircd_strtok(&p2, cbuf, ","); t; t = ircd_strtok(&p2, NULL, ","))
183 if (0 == ircd_strcmp(s, t))
206 /** Copy one string to another, not to exceed a certain length.
207 * @param[in] s1 Output buffer.
208 * @param[in] s2 Source buffer.
209 * @param[in] n Maximum number of bytes to write, plus one.
210 * @return The original input buffer \a s1.
212 char* ircd_strncpy(char* s1, const char* s2, size_t n)
220 while (s < endp && (*s++ = *s2++))
227 NTL_HDR_strChattr { NTL_SRC_strChattr }
228 NTL_HDR_strCasediff { NTL_SRC_strCasediff }
229 #endif /* !FORCEINLINE */
232 * Other functions visible externally
235 /** Find common character attributes for the start of a string.
236 * @param[in] s Input string to scan.
237 * @param[in] n Maximum number of bytes to check.
238 * @return Bitmask of all character attributes shared by the start of \a s.
240 int strnChattr(const char *s, const size_t n)
246 x &= IRCD_CharAttrTab[*rs++ - CHAR_MIN];
250 /** Case insensitive string comparison.
251 * @param[in] a First string to compare.
252 * @param[in] b Second string to compare.
253 * @return Less than, equal to, or greater than zero if \a a is lexicographically less than, equal to, or greater than \a b.
255 int ircd_strcmp(const char *a, const char *b)
259 while (ToLower(*ra) == ToLower(*rb)) {
268 /** Case insensitive comparison of the starts of two strings.
269 * @param[in] a First string to compare.
270 * @param[in] b Second string to compare.
271 * @param[in] n Maximum number of characters to compare.
272 * @return Less than, equal to, or greater than zero if \a a is
273 * lexicographically less than, equal to, or greater than \a b.
275 int ircd_strncmp(const char *a, const char *b, size_t n)
282 while (ToLower(*ra) == ToLower(*rb)) {
283 if (!*ra++ || !left--)
291 /** Fill a vector of distinct names from a delimited input list.
292 * Empty tokens (when \a token occurs at the start or end of \a list,
293 * or when \a token occurs adjacent to itself) are ignored. When
294 * \a size tokens have been written to \a vector, the rest of the
296 * Unlike token_vector(), if a token repeats an earlier token, it is
298 * @param[in,out] names Input buffer.
299 * @param[in] token Delimiter used to split \a list.
300 * @param[out] vector Output vector.
301 * @param[in] size Maximum number of elements to put in \a vector.
302 * @return Number of elements written to \a vector.
304 int unique_name_vector(char* names, char token, char** vector, int size)
316 * ignore spurious tokens
318 while (token == *start)
321 for (end = strchr(start, token); end; end = strchr(start, token)) {
324 * ignore spurious tokens
326 while (token == *end)
328 for (i = 0; i < count; ++i) {
329 if (0 == ircd_strcmp(vector[i], start))
333 vector[count++] = start;
340 for (i = 0; i < count; ++i)
341 if (0 == ircd_strcmp(vector[i], start))
343 vector[count++] = start;
348 /** Fill a vector of tokens from a delimited input list.
349 * Empty tokens (when \a token occurs at the start or end of \a list,
350 * or when \a token occurs adjacent to itself) are ignored. When
351 * \a size tokens have been written to \a vector, the rest of the
353 * @param[in,out] names Input buffer.
354 * @param[in] token Delimiter used to split \a list.
355 * @param[out] vector Output vector.
356 * @param[in] size Maximum number of elements to put in \a vector.
357 * @return Number of elements written to \a vector.
359 int token_vector(char* names, char token, char** vector, int size)
369 vector[count++] = start;
370 for (end = strchr(start, token); end; end = strchr(start, token)) {
374 vector[count++] = start;
383 /** Copy all or part of the hostname in a string to another string.
384 * If \a userhost contains an '\@', the remaining portion is used;
385 * otherwise, the whole \a userhost is used.
386 * @param[out] buf Output buffer.
387 * @param[in] userhost user\@hostname or hostname string.
388 * @param[in] len Maximum number of bytes to write to \a host.
389 * @return The output buffer \a buf.
391 char* host_from_uh(char* buf, const char* userhost, size_t len)
396 assert(0 != userhost);
398 if ((s = strchr(userhost, '@')))
402 ircd_strncpy(buf, s, len);
408 * this new faster inet_ntoa was ripped from:
409 * From: Thomas Helvey <tomh@inxpress.net>
411 /** Array of text strings for dotted quads. */
412 static const char* IpQuadTab[] =
414 "0", "1", "2", "3", "4", "5", "6", "7", "8", "9",
415 "10", "11", "12", "13", "14", "15", "16", "17", "18", "19",
416 "20", "21", "22", "23", "24", "25", "26", "27", "28", "29",
417 "30", "31", "32", "33", "34", "35", "36", "37", "38", "39",
418 "40", "41", "42", "43", "44", "45", "46", "47", "48", "49",
419 "50", "51", "52", "53", "54", "55", "56", "57", "58", "59",
420 "60", "61", "62", "63", "64", "65", "66", "67", "68", "69",
421 "70", "71", "72", "73", "74", "75", "76", "77", "78", "79",
422 "80", "81", "82", "83", "84", "85", "86", "87", "88", "89",
423 "90", "91", "92", "93", "94", "95", "96", "97", "98", "99",
424 "100", "101", "102", "103", "104", "105", "106", "107", "108", "109",
425 "110", "111", "112", "113", "114", "115", "116", "117", "118", "119",
426 "120", "121", "122", "123", "124", "125", "126", "127", "128", "129",
427 "130", "131", "132", "133", "134", "135", "136", "137", "138", "139",
428 "140", "141", "142", "143", "144", "145", "146", "147", "148", "149",
429 "150", "151", "152", "153", "154", "155", "156", "157", "158", "159",
430 "160", "161", "162", "163", "164", "165", "166", "167", "168", "169",
431 "170", "171", "172", "173", "174", "175", "176", "177", "178", "179",
432 "180", "181", "182", "183", "184", "185", "186", "187", "188", "189",
433 "190", "191", "192", "193", "194", "195", "196", "197", "198", "199",
434 "200", "201", "202", "203", "204", "205", "206", "207", "208", "209",
435 "210", "211", "212", "213", "214", "215", "216", "217", "218", "219",
436 "220", "221", "222", "223", "224", "225", "226", "227", "228", "229",
437 "230", "231", "232", "233", "234", "235", "236", "237", "238", "239",
438 "240", "241", "242", "243", "244", "245", "246", "247", "248", "249",
439 "250", "251", "252", "253", "254", "255"
442 /** Convert an IP address to printable ASCII form.
443 * This is generally deprecated in favor of ircd_ntoa_r().
444 * @param[in] in Address to convert.
445 * @return Pointer to a static buffer containing the readable form.
447 const char* ircd_ntoa(const struct irc_in_addr* in)
449 static char buf[SOCKIPLEN];
450 return ircd_ntoa_r(buf, in);
453 /* This doesn't really belong here, but otherwise umkpasswd breaks. */
454 /** Check whether an IP address looks like an IPv4 address.
455 * @param[in] addr Address to check.
456 * @return Non-zero if the address is a valid IPv4 address, zero if not.
458 int irc_in_addr_is_ipv4(const struct irc_in_addr *addr)
460 return addr->in6_16[0] == 0
461 && addr->in6_16[1] == 0
462 && addr->in6_16[2] == 0
463 && addr->in6_16[3] == 0
464 && addr->in6_16[4] == 0
465 && (addr->in6_16[5] == 0 || addr->in6_16[5] == 0xffff)
466 && addr->in6_16[6] != 0;
469 /** Convert an IP address to printable ASCII form.
470 * @param[out] buf Output buffer to write to.
471 * @param[in] in Address to format.
472 * @return Pointer to the output buffer \a buf.
474 const char* ircd_ntoa_r(char* buf, const struct irc_in_addr* in)
479 if (irc_in_addr_is_ipv4(in)) {
480 unsigned int pos, len;
483 pch = (unsigned char*)&in->in6_16[6];
484 len = strlen(IpQuadTab[*pch]);
485 memcpy(buf, IpQuadTab[*pch++], len);
488 len = strlen(IpQuadTab[*pch]);
489 memcpy(buf+pos, IpQuadTab[*pch++], len);
492 len = strlen(IpQuadTab[*pch]);
493 memcpy(buf+pos, IpQuadTab[*pch++], len);
496 len = strlen(IpQuadTab[*pch]);
497 memcpy(buf+pos, IpQuadTab[*pch++], len);
498 buf[pos + len] = '\0';
501 static const char hexdigits[] = "0123456789abcdef";
502 unsigned int pos, part, max_start, max_zeros, curr_zeros, ii;
504 /* Find longest run of zeros. */
505 for (max_start = ii = 1, max_zeros = curr_zeros = 0; ii < 8; ++ii) {
508 else if (curr_zeros > max_zeros) {
509 max_start = ii - curr_zeros;
510 max_zeros = curr_zeros;
514 if (curr_zeros > max_zeros) {
515 max_start = ii - curr_zeros;
516 max_zeros = curr_zeros;
519 /* Print out address. */
520 /** Append \a CH to the output buffer. */
521 #define APPEND(CH) do { buf[pos++] = (CH); } while (0)
522 for (pos = ii = 0; (ii < 8); ++ii) {
523 if ((max_zeros > 0) && (ii == max_start)) {
528 part = ntohs(in->in6_16[ii]);
530 APPEND(hexdigits[part >> 12]);
532 APPEND(hexdigits[(part >> 8) & 15]);
534 APPEND(hexdigits[(part >> 4) & 15]);
535 APPEND(hexdigits[part & 15]);
539 if (max_zeros + max_start == 8)
543 /* Nul terminate and return number of characters used. */
549 /** Attempt to parse an IPv4 address into a network-endian form.
550 * @param[in] input Input string.
551 * @param[out] output Network-endian representation of the address.
552 * @return Number of characters used from \a input, or 0 if the parse failed.
555 ircd_aton_ip4(const char *input, unsigned int *output)
557 unsigned int dots = 0, pos = 0, part = 0, ip = 0;
559 /* Intentionally no support for bizarre IPv4 formats (plain
560 * integers, octal or hex components) -- only vanilla dotted
566 if (IsDigit(input[pos])) {
567 part = part * 10 + input[pos++] - '0';
570 if ((dots == 3) && !IsDigit(input[pos])) {
571 *output = htonl(ip | part);
574 } else if (input[pos] == '.') {
575 if (input[++pos] == '.')
577 ip |= part << (24 - 8 * dots++);
584 /** Parse a numeric IPv4 or IPv6 address into an irc_in_addr.
585 * @param[out] ip Receives parsed IP address.
586 * @param[in] input Input buffer.
587 * @return Number of characters used from \a input, or 0 if the
588 * address was unparseable or malformed.
591 ircd_aton(struct irc_in_addr *ip, const char *input)
598 memset(ip, 0, sizeof(*ip));
599 colon = strchr(input, ':');
600 dot = strchr(input, '.');
602 if (colon && (!dot || (dot > colon))) {
603 unsigned int part = 0, pos = 0, ii = 0, colon = 8;
604 const char *part_start = NULL;
606 /* Parse IPv6, possibly like ::127.0.0.1.
607 * This is pretty straightforward; the only trick is borrowed
608 * from Paul Vixie (BIND): when it sees a "::" continue as if
609 * it were a single ":", but note where it happened, and fill
610 * with zeros afterwards.
612 if (input[pos] == ':') {
613 if ((input[pos+1] != ':') || (input[pos+2] == ':'))
621 switch (input[pos]) {
622 case '0': case '1': case '2': case '3': case '4':
623 case '5': case '6': case '7': case '8': case '9':
624 chval = input[pos] - '0';
626 part = (part << 4) | chval;
631 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
632 chval = input[pos] - 'A' + 10;
634 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
635 chval = input[pos] - 'a' + 10;
638 part_start = input + ++pos;
639 if (input[pos] == '.')
641 ip->in6_16[ii++] = htons(part);
643 if (input[pos] == ':') {
653 len = ircd_aton_ip4(input + pos, &ip4);
654 if (!len || (ii > 6))
656 ip->in6_16[ii++] = htons(ntohl(ip4) >> 16);
657 ip->in6_16[ii++] = htons(ntohl(ip4) & 65535);
665 /* Shift stuff after "::" up and fill middle with zeros. */
666 ip->in6_16[ii++] = htons(part);
667 for (jj = 0; jj < ii - colon; jj++)
668 ip->in6_16[7 - jj] = ip->in6_16[ii - jj - 1];
669 for (jj = 0; jj < 8 - ii; jj++)
670 ip->in6_16[colon + jj] = 0;
678 int len = ircd_aton_ip4(input, &addr);
680 ip->in6_16[6] = htons(ntohl(addr) >> 16);
681 ip->in6_16[7] = htons(ntohl(addr) & 65535);
685 return 0; /* parse failed */