3 /* Copyright (C) 1995, 1996 Tom Lord
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU Library General Public License as published by
7 * the Free Software Foundation; either version 2, or (at your option)
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU Library General Public License for more details.
15 * You should have received a copy of the GNU Library General Public License
16 * along with this software; see the file COPYING. If not, write to
17 * the Free Software Foundation, 59 Temple Place - Suite 330,
18 * Boston, MA 02111-1307, USA.
27 /* The functions in the next several pages define the lazy-NFA-conversion used
28 * by matchers. The input to this construction is an NFA such as
29 * is built by compactify_nfa (rx.c). The output is the superNFA.
32 /* Match engines can use arbitrary values for opcodes. So, the parse tree
33 * is built using instructions names (enum rx_opcode), but the superstate
34 * nfa is populated with mystery opcodes (void *).
36 * For convenience, here is an id table. The opcodes are == to their inxs
38 * The lables in re_search_2 would make good values for instructions.
41 void * rx_id_instruction_table[rx_num_instructions] =
43 (void *) rx_backtrack_point,
44 (void *) rx_do_side_effects,
45 (void *) rx_cache_miss,
46 (void *) rx_next_char,
47 (void *) rx_backtrack,
54 /* The partially instantiated superstate graph has a transition
55 * table at every node. There is one entry for every character.
56 * This fills in the transition for a set.
60 install_transition (struct rx_superstate *super,
61 struct rx_inx *answer, rx_Bitset trcset)
64 install_transition (super, answer, trcset)
65 struct rx_superstate *super;
66 struct rx_inx *answer;
70 struct rx_inx * transitions = super->transitions;
72 for (chr = 0; chr < 256; )
80 RX_subset sub = *trcset;
86 transitions [chr] = *answer;
97 qlen (struct rx_superstate * q)
101 struct rx_superstate * q;
105 struct rx_superstate * it;
108 for (it = q->next_recyclable; it != q; it = it->next_recyclable)
115 check_cache (struct rx_cache * cache)
119 struct rx_cache * cache;
122 struct rx_cache * you_fucked_up = 0;
123 int total = cache->superstates;
124 int semi = cache->semifree_superstates;
125 if (semi != qlen (cache->semifree_superstate))
126 check_cache (you_fucked_up);
127 if ((total - semi) != qlen (cache->lru_superstate))
128 check_cache (you_fucked_up);
132 rx_cache_malloc (struct rx_cache * cache, int size)
135 rx_cache_malloc (cache, size)
136 struct rx_cache * cache;
141 answer = (char *)malloc (size);
143 cache->bytes_used += size;
150 rx_cache_free (struct rx_cache * cache, int size, char * mem)
153 rx_cache_free (cache, size, mem)
154 struct rx_cache * cache;
160 cache->bytes_used -= size;
165 /* When a superstate is old and neglected, it can enter a
166 * semi-free state. A semi-free state is slated to die.
167 * Incoming transitions to a semi-free state are re-written
168 * to cause an (interpreted) fault when they are taken.
169 * The fault handler revives the semi-free state, patches
170 * incoming transitions back to normal, and continues.
172 * The idea is basicly to free in two stages, aborting
173 * between the two if the state turns out to be useful again.
174 * When a free is aborted, the rescued superstate is placed
175 * in the most-favored slot to maximize the time until it
176 * is next semi-freed.
178 * Overall, this approximates truly freeing superstates in
179 * lru order, but does not involve the cost of updating perfectly
180 * accurate timestamps every time a superstate is touched.
185 semifree_superstate (struct rx_cache * cache)
188 semifree_superstate (cache)
189 struct rx_cache * cache;
192 int disqualified = cache->semifree_superstates;
193 if (disqualified == cache->superstates)
195 while (cache->lru_superstate->locks)
197 cache->lru_superstate = cache->lru_superstate->next_recyclable;
199 if (disqualified == cache->superstates)
203 struct rx_superstate * it = cache->lru_superstate;
204 it->next_recyclable->prev_recyclable = it->prev_recyclable;
205 it->prev_recyclable->next_recyclable = it->next_recyclable;
206 cache->lru_superstate = (it == it->next_recyclable
208 : it->next_recyclable);
209 if (!cache->semifree_superstate)
211 cache->semifree_superstate = it;
212 it->next_recyclable = it;
213 it->prev_recyclable = it;
217 it->prev_recyclable = cache->semifree_superstate->prev_recyclable;
218 it->next_recyclable = cache->semifree_superstate;
219 it->prev_recyclable->next_recyclable = it;
220 it->next_recyclable->prev_recyclable = it;
223 struct rx_distinct_future *df;
225 ++cache->semifree_superstates;
226 df = it->transition_refs;
229 df->prev_same_dest->next_same_dest = 0;
230 for (df = it->transition_refs; df; df = df->next_same_dest)
232 df->future_frame.inx = cache->instruction_table[rx_cache_miss];
233 df->future_frame.data = 0;
234 df->future_frame.data_2 = (void *) df;
235 /* If there are any NEXT-CHAR instruction frames that
236 * refer to this state, we convert them to CACHE-MISS frames.
239 && (df->edge->options->next_same_super_edge[0]
240 == df->edge->options))
241 install_transition (df->present, &df->future_frame,
244 df = it->transition_refs;
245 df->prev_same_dest->next_same_dest = df;
254 refresh_semifree_superstate (struct rx_cache * cache,
255 struct rx_superstate * super)
258 refresh_semifree_superstate (cache, super)
259 struct rx_cache * cache;
260 struct rx_superstate * super;
263 struct rx_distinct_future *df;
265 if (super->transition_refs)
267 super->transition_refs->prev_same_dest->next_same_dest = 0;
268 for (df = super->transition_refs; df; df = df->next_same_dest)
270 df->future_frame.inx = cache->instruction_table[rx_next_char];
271 df->future_frame.data = (void *) super->transitions;
272 df->future_frame.data_2 = (void *)(super->contents->is_final);
273 /* CACHE-MISS instruction frames that refer to this state,
274 * must be converted to NEXT-CHAR frames.
277 && (df->edge->options->next_same_super_edge[0]
278 == df->edge->options))
279 install_transition (df->present, &df->future_frame,
282 super->transition_refs->prev_same_dest->next_same_dest
283 = super->transition_refs;
285 if (cache->semifree_superstate == super)
286 cache->semifree_superstate = (super->prev_recyclable == super
288 : super->prev_recyclable);
289 super->next_recyclable->prev_recyclable = super->prev_recyclable;
290 super->prev_recyclable->next_recyclable = super->next_recyclable;
292 if (!cache->lru_superstate)
293 (cache->lru_superstate
294 = super->next_recyclable
295 = super->prev_recyclable
299 super->next_recyclable = cache->lru_superstate;
300 super->prev_recyclable = cache->lru_superstate->prev_recyclable;
301 super->next_recyclable->prev_recyclable = super;
302 super->prev_recyclable->next_recyclable = super;
304 super->is_semifree = 0;
305 --cache->semifree_superstates;
310 rx_refresh_this_superstate (struct rx_cache * cache,
311 struct rx_superstate * superstate)
314 rx_refresh_this_superstate (cache, superstate)
315 struct rx_cache * cache;
316 struct rx_superstate * superstate;
319 if (superstate->is_semifree)
320 refresh_semifree_superstate (cache, superstate);
321 else if (cache->lru_superstate == superstate)
322 cache->lru_superstate = superstate->next_recyclable;
323 else if (superstate != cache->lru_superstate->prev_recyclable)
325 superstate->next_recyclable->prev_recyclable
326 = superstate->prev_recyclable;
327 superstate->prev_recyclable->next_recyclable
328 = superstate->next_recyclable;
329 superstate->next_recyclable = cache->lru_superstate;
330 superstate->prev_recyclable = cache->lru_superstate->prev_recyclable;
331 superstate->next_recyclable->prev_recyclable = superstate;
332 superstate->prev_recyclable->next_recyclable = superstate;
338 release_superset_low (struct rx_cache * cache,
339 struct rx_superset *set)
342 release_superset_low (cache, set)
343 struct rx_cache * cache;
344 struct rx_superset *set;
350 set->starts_for->start_set = 0;
353 release_superset_low (cache, set->cdr);
355 rx_hash_free (&set->hash_item, &cache->superset_hash_rules);
356 rx_cache_free (cache,
357 sizeof (struct rx_superset),
364 rx_release_superset (struct rx *rx,
365 struct rx_superset *set)
368 rx_release_superset (rx, set)
370 struct rx_superset *set;
373 release_superset_low (rx->cache, set);
376 /* This tries to add a new superstate to the superstate freelist.
377 * It might, as a result, free some edge pieces or hash tables.
378 * If nothing can be freed because too many locks are being held, fail.
383 rx_really_free_superstate (struct rx_cache * cache)
386 rx_really_free_superstate (cache)
387 struct rx_cache * cache;
390 int locked_superstates = 0;
391 struct rx_superstate * it;
393 if (!cache->superstates)
397 while ((cache->hits + cache->misses) > cache->superstates)
403 /* semifree superstates faster than they are freed
404 * so popular states can be rescued.
406 semifree_superstate (cache);
407 semifree_superstate (cache);
408 semifree_superstate (cache);
411 Redundant because semifree_superstate already
415 while (cache->semifree_superstate && cache->semifree_superstate->locks)
417 refresh_semifree_superstate (cache, cache->semifree_superstate);
418 ++locked_superstates;
419 if (locked_superstates == cache->superstates)
424 if (cache->semifree_superstate)
425 insert the "it =" block which follows this "if 0" code;
428 while (cache->lru_superstate->locks)
430 cache->lru_superstate = cache->lru_superstate->next_recyclable;
431 ++locked_superstates;
432 if (locked_superstates == cache->superstates)
435 it = cache->lru_superstate;
436 it->next_recyclable->prev_recyclable = it->prev_recyclable;
437 it->prev_recyclable->next_recyclable = it->next_recyclable;
438 cache->lru_superstate = ((it == it->next_recyclable)
440 : it->next_recyclable);
444 if (!cache->semifree_superstate)
448 it = cache->semifree_superstate;
449 it->next_recyclable->prev_recyclable = it->prev_recyclable;
450 it->prev_recyclable->next_recyclable = it->next_recyclable;
451 cache->semifree_superstate = ((it == it->next_recyclable)
453 : it->next_recyclable);
454 --cache->semifree_superstates;
458 if (it->transition_refs)
460 struct rx_distinct_future *df;
461 for (df = it->transition_refs,
462 df->prev_same_dest->next_same_dest = 0;
464 df = df->next_same_dest)
466 df->future_frame.inx = cache->instruction_table[rx_cache_miss];
467 df->future_frame.data = 0;
468 df->future_frame.data_2 = (void *) df;
471 it->transition_refs->prev_same_dest->next_same_dest =
475 struct rx_super_edge *tc = it->edges;
478 struct rx_distinct_future * df;
479 struct rx_super_edge *tct = tc->next;
481 df->next_same_super_edge[1]->next_same_super_edge[0] = 0;
484 struct rx_distinct_future *dft = df;
485 df = df->next_same_super_edge[0];
488 if (dft->future && dft->future->transition_refs == dft)
490 dft->future->transition_refs = dft->next_same_dest;
491 if (dft->future->transition_refs == dft)
492 dft->future->transition_refs = 0;
494 dft->next_same_dest->prev_same_dest = dft->prev_same_dest;
495 dft->prev_same_dest->next_same_dest = dft->next_same_dest;
496 rx_cache_free (cache,
497 sizeof (struct rx_distinct_future),
500 rx_cache_free (cache,
501 sizeof (struct rx_super_edge),
507 if (it->contents->superstate == it)
508 it->contents->superstate = 0;
509 release_superset_low (cache, it->contents);
510 rx_cache_free (cache,
511 (sizeof (struct rx_superstate)
512 + cache->local_cset_size * sizeof (struct rx_inx)),
514 --cache->superstates;
521 rx_cache_malloc_or_get (struct rx_cache * cache, int size)
524 rx_cache_malloc_or_get (cache, size)
525 struct rx_cache * cache;
529 while ( (cache->bytes_used + size > cache->bytes_allowed)
530 && rx_really_free_superstate (cache))
533 return rx_cache_malloc (cache, size);
540 supersetcmp (void * va, void * vb)
548 struct rx_superset * a = (struct rx_superset *)va;
549 struct rx_superset * b = (struct rx_superset *)vb;
551 || (a && b && (a->id == b->id) && (a->car == b->car) && (a->cdr == b->cdr)));
554 #define rx_abs(A) (((A) > 0) ? (A) : -(A))
558 static struct rx_hash_item *
559 superset_allocator (struct rx_hash_rules * rules, void * val)
561 static struct rx_hash_item *
562 superset_allocator (rules, val)
563 struct rx_hash_rules * rules;
567 struct rx_cache * cache;
568 struct rx_superset * template;
569 struct rx_superset * newset;
571 cache = ((struct rx_cache *)
573 - (unsigned long)(&((struct rx_cache *)0)->superset_hash_rules)));
574 template = (struct rx_superset *)val;
575 newset = ((struct rx_superset *)
576 rx_cache_malloc (cache, sizeof (*template)));
583 cdrfinal = (template->cdr
584 ? template->cdr->is_final
586 cdredges = (template->cdr
587 ? template->cdr->has_cset_edges
590 newset->is_final = (rx_abs (template->car->is_final) > rx_abs(cdrfinal)
591 ? template->car->is_final
592 : template->cdr->is_final);
593 newset->has_cset_edges = (template->car->has_cset_edges || cdredges);
596 newset->id = template->id;
597 newset->car = template->car;
598 newset->cdr = template->cdr;
599 rx_protect_superset (rx, template->cdr);
600 newset->superstate = 0;
601 newset->starts_for = 0;
602 newset->hash_item.data = (void *)newset;
603 newset->hash_item.binding = 0;
604 return &newset->hash_item;
608 static struct rx_hash *
609 super_hash_allocator (struct rx_hash_rules * rules)
611 static struct rx_hash *
612 super_hash_allocator (rules)
613 struct rx_hash_rules * rules;
616 struct rx_cache * cache;
617 cache = ((struct rx_cache *)
619 - (unsigned long)(&((struct rx_cache *)0)->superset_hash_rules)));
620 return ((struct rx_hash *)
621 rx_cache_malloc (cache, sizeof (struct rx_hash)));
627 super_hash_liberator (struct rx_hash * hash, struct rx_hash_rules * rules)
630 super_hash_liberator (hash, rules)
631 struct rx_hash * hash;
632 struct rx_hash_rules * rules;
635 struct rx_cache * cache
636 = ((struct rx_cache *)
637 (char *)rules - (long)(&((struct rx_cache *)0)->superset_hash_rules));
638 rx_cache_free (cache, sizeof (struct rx_hash), (char *)hash);
643 superset_hash_item_liberator (struct rx_hash_item * it,
644 struct rx_hash_rules * rules)
647 superset_hash_item_liberator (it, rules)
648 struct rx_hash_item * it;
649 struct rx_hash_rules * rules;
654 int rx_cache_bound = 3;
655 static int rx_default_cache_got = 0;
657 static struct rx_cache default_cache =
661 super_hash_allocator,
662 super_hash_liberator,
664 superset_hash_item_liberator,
673 RX_DEFAULT_DFA_CACHE_SIZE,
676 rx_id_instruction_table,
685 struct rx_cache * rx_default_cache = &default_cache;
687 /* This adds an element to a superstate set. These sets are lists, such
688 * that lists with == elements are ==. The empty set is returned by
689 * superset_cons (rx, 0, 0) and is NOT equivelent to
690 * (struct rx_superset)0.
695 rx_superset_cons (struct rx * rx,
696 struct rx_nfa_state *car, struct rx_superset *cdr)
699 rx_superset_cons (rx, car, cdr)
701 struct rx_nfa_state *car;
702 struct rx_superset *cdr;
705 struct rx_cache * cache = rx->cache;
708 if (!cache->empty_superset)
710 cache->empty_superset
711 = ((struct rx_superset *)
712 rx_cache_malloc (cache, sizeof (struct rx_superset)));
713 if (!cache->empty_superset)
715 rx_bzero ((char *)cache->empty_superset, sizeof (struct rx_superset));
716 cache->empty_superset->refs = 1000;
718 return cache->empty_superset;
721 struct rx_superset template;
722 struct rx_hash_item * hit;
725 template.id = rx->rx_id;
726 rx_protect_superset (rx, template.cdr);
727 hit = rx_hash_store (&cache->superset_table,
728 (unsigned long)car ^ car->id ^ (unsigned long)cdr,
730 &cache->superset_hash_rules);
731 rx_protect_superset (rx, template.cdr);
733 ? (struct rx_superset *)hit->data
738 /* This computes a union of two NFA state sets. The sets do not have the
739 * same representation though. One is a RX_SUPERSET structure (part
740 * of the superstate NFA) and the other is an NFA_STATE_SET (part of the NFA).
745 rx_superstate_eclosure_union (struct rx * rx, struct rx_superset *set, struct rx_nfa_state_set *ecl)
748 rx_superstate_eclosure_union (rx, set, ecl)
750 struct rx_superset *set;
751 struct rx_nfa_state_set *ecl;
758 return rx_superset_cons (rx, ecl->car,
759 rx_superstate_eclosure_union (rx, set, ecl->cdr));
760 if (set->car == ecl->car)
761 return rx_superstate_eclosure_union (rx, set, ecl->cdr);
764 struct rx_superset * tail;
765 struct rx_nfa_state * first;
767 if (set->car->id < ecl->car->id)
769 tail = rx_superstate_eclosure_union (rx, set->cdr, ecl);
774 tail = rx_superstate_eclosure_union (rx, set, ecl->cdr);
781 struct rx_superset * answer;
782 answer = rx_superset_cons (rx, first, tail);
785 rx_protect_superset (rx, tail);
786 rx_release_superset (rx, tail);
799 * This makes sure that a list of rx_distinct_futures contains
800 * a future for each possible set of side effects in the eclosure
801 * of a given state. This is some of the work of filling in a
802 * superstate transition.
806 static struct rx_distinct_future *
807 include_futures (struct rx *rx,
808 struct rx_distinct_future *df, struct rx_nfa_state
809 *state, struct rx_superstate *superstate)
811 static struct rx_distinct_future *
812 include_futures (rx, df, state, superstate)
814 struct rx_distinct_future *df;
815 struct rx_nfa_state *state;
816 struct rx_superstate *superstate;
819 struct rx_possible_future *future;
820 struct rx_cache * cache = rx->cache;
821 for (future = rx_state_possible_futures (rx, state); future; future = future->next)
823 struct rx_distinct_future *dfp;
824 struct rx_distinct_future *insert_before = 0;
826 df->next_same_super_edge[1]->next_same_super_edge[0] = 0;
827 for (dfp = df; dfp; dfp = dfp->next_same_super_edge[0])
828 if (dfp->effects == future->effects)
832 int order = rx->se_list_cmp (rx, dfp->effects, future->effects);
841 df->next_same_super_edge[1]->next_same_super_edge[0] = df;
845 = ((struct rx_distinct_future *)
846 rx_cache_malloc (cache,
847 sizeof (struct rx_distinct_future)));
852 df = insert_before = dfp;
853 df->next_same_super_edge[0] = df->next_same_super_edge[1] = df;
855 else if (!insert_before)
857 else if (insert_before == df)
860 dfp->next_same_super_edge[0] = insert_before;
861 dfp->next_same_super_edge[1]
862 = insert_before->next_same_super_edge[1];
863 dfp->next_same_super_edge[1]->next_same_super_edge[0] = dfp;
864 dfp->next_same_super_edge[0]->next_same_super_edge[1] = dfp;
865 dfp->next_same_dest = dfp->prev_same_dest = dfp;
867 dfp->present = superstate;
868 dfp->future_frame.inx = rx->instruction_table[rx_cache_miss];
869 dfp->future_frame.data = 0;
870 dfp->future_frame.data_2 = (void *) dfp;
871 dfp->side_effects_frame.inx
872 = rx->instruction_table[rx_do_side_effects];
873 dfp->side_effects_frame.data = 0;
874 dfp->side_effects_frame.data_2 = (void *) dfp;
875 dfp->effects = future->effects;
883 /* This constructs a new superstate from its state set. The only
884 * complexity here is memory management.
887 struct rx_superstate *
888 rx_superstate (struct rx *rx,
889 struct rx_superset *set)
891 struct rx_superstate *
892 rx_superstate (rx, set)
894 struct rx_superset *set;
897 struct rx_cache * cache = rx->cache;
898 struct rx_superstate * superstate = 0;
900 /* Does the superstate already exist in the cache? */
903 if (set->superstate->rx_id != rx->rx_id)
905 /* Aha. It is in the cache, but belongs to a superstate
906 * that refers to an NFA that no longer exists.
907 * (We know it no longer exists because it was evidently
908 * stored in the same region of memory as the current nfa
909 * yet it has a different id.)
911 superstate = set->superstate;
912 if (!superstate->is_semifree)
914 if (cache->lru_superstate == superstate)
916 cache->lru_superstate = superstate->next_recyclable;
917 if (cache->lru_superstate == superstate)
918 cache->lru_superstate = 0;
921 superstate->next_recyclable->prev_recyclable
922 = superstate->prev_recyclable;
923 superstate->prev_recyclable->next_recyclable
924 = superstate->next_recyclable;
925 if (!cache->semifree_superstate)
927 (cache->semifree_superstate
928 = superstate->next_recyclable
929 = superstate->prev_recyclable
934 superstate->next_recyclable = cache->semifree_superstate;
935 superstate->prev_recyclable
936 = cache->semifree_superstate->prev_recyclable;
937 superstate->next_recyclable->prev_recyclable
939 superstate->prev_recyclable->next_recyclable
941 cache->semifree_superstate = superstate;
943 ++cache->semifree_superstates;
947 goto handle_cache_miss;
950 superstate = set->superstate;
952 rx_refresh_this_superstate (cache, superstate);
958 /* This point reached only for cache misses. */
961 if (rx_debug_trace > 1)
963 struct rx_superset * setp = set;
964 fprintf (stderr, "Building a superstet %d(%d): ", rx->rx_id, set);
967 fprintf (stderr, "%d ", setp->id);
970 fprintf (stderr, "(%d)\n", set);
976 superstate_size = ( sizeof (*superstate)
977 + ( sizeof (struct rx_inx)
978 * rx->local_cset_size));
979 superstate = ((struct rx_superstate *)
980 rx_cache_malloc_or_get (cache, superstate_size));
981 ++cache->superstates;
987 if (!cache->lru_superstate)
988 (cache->lru_superstate
989 = superstate->next_recyclable
990 = superstate->prev_recyclable
994 superstate->next_recyclable = cache->lru_superstate;
995 superstate->prev_recyclable = cache->lru_superstate->prev_recyclable;
996 ( superstate->prev_recyclable->next_recyclable
997 = superstate->next_recyclable->prev_recyclable
1000 superstate->rx_id = rx->rx_id;
1001 superstate->transition_refs = 0;
1002 superstate->locks = 0;
1003 superstate->is_semifree = 0;
1004 set->superstate = superstate;
1005 superstate->contents = set;
1006 rx_protect_superset (rx, set);
1007 superstate->edges = 0;
1010 /* None of the transitions from this superstate are known yet. */
1011 for (x = 0; x < rx->local_cset_size; ++x)
1013 struct rx_inx * ifr = &superstate->transitions[x];
1014 ifr->inx = rx->instruction_table [rx_cache_miss];
1015 ifr->data = ifr->data_2 = 0;
1022 /* This computes the destination set of one edge of the superstate NFA.
1023 * Note that a RX_DISTINCT_FUTURE is a superstate edge.
1024 * Returns 0 on an allocation failure.
1029 solve_destination (struct rx *rx, struct rx_distinct_future *df)
1032 solve_destination (rx, df)
1034 struct rx_distinct_future *df;
1037 struct rx_super_edge *tc = df->edge;
1038 struct rx_superset *nfa_state;
1039 struct rx_superset *nil_set = rx_superset_cons (rx, 0, 0);
1040 struct rx_superset *solution = nil_set;
1041 struct rx_superstate *dest;
1043 rx_protect_superset (rx, solution);
1044 /* Iterate over all NFA states in the state set of this superstate. */
1045 for (nfa_state = df->present->contents;
1047 nfa_state = nfa_state->cdr)
1049 struct rx_nfa_edge *e;
1050 /* Iterate over all edges of each NFA state. */
1051 for (e = nfa_state->car->edges; e; e = e->next)
1052 /* If we find an edge that is labeled with
1053 * the characters we are solving for.....
1055 if ((e->type == ne_cset)
1056 && rx_bitset_is_subset (rx->local_cset_size,
1060 struct rx_nfa_state *n = e->dest;
1061 struct rx_possible_future *pf;
1062 /* ....search the partial epsilon closures of the destination
1063 * of that edge for a path that involves the same set of
1064 * side effects we are solving for.
1065 * If we find such a RX_POSSIBLE_FUTURE, we add members to the
1066 * stateset we are computing.
1068 for (pf = rx_state_possible_futures (rx, n); pf; pf = pf->next)
1069 if (pf->effects == df->effects)
1071 struct rx_superset * old_sol;
1073 solution = rx_superstate_eclosure_union (rx, solution,
1077 rx_protect_superset (rx, solution);
1078 rx_release_superset (rx, old_sol);
1082 /* It is possible that the RX_DISTINCT_FUTURE we are working on has
1083 * the empty set of NFA states as its definition. In that case, this
1084 * is a failure point.
1086 if (solution == nil_set)
1088 df->future_frame.inx = (void *) rx_backtrack;
1089 df->future_frame.data = 0;
1090 df->future_frame.data_2 = 0;
1093 dest = rx_superstate (rx, solution);
1094 rx_release_superset (rx, solution);
1099 struct rx_distinct_future *dft;
1101 df->prev_same_dest->next_same_dest = 0;
1105 dft->future_frame.inx = rx->instruction_table[rx_next_char];
1106 dft->future_frame.data = (void *) dest->transitions;
1107 dft->future_frame.data_2 = (void *) dest->contents->is_final;
1108 dft = dft->next_same_dest;
1110 df->prev_same_dest->next_same_dest = df;
1112 if (!dest->transition_refs)
1113 dest->transition_refs = df;
1116 struct rx_distinct_future *dft;
1117 dft = dest->transition_refs->next_same_dest;
1118 dest->transition_refs->next_same_dest = df->next_same_dest;
1119 df->next_same_dest->prev_same_dest = dest->transition_refs;
1120 df->next_same_dest = dft;
1121 dft->prev_same_dest = df;
1127 /* This takes a superstate and a character, and computes some edges
1128 * from the superstate NFA. In particular, this computes all edges
1129 * that lead from SUPERSTATE given CHR. This function also
1130 * computes the set of characters that share this edge set.
1131 * This returns 0 on allocation error.
1132 * The character set and list of edges are returned through
1133 * the paramters CSETOUT and DFOUT.
1138 compute_super_edge (struct rx *rx, struct rx_distinct_future **dfout,
1139 rx_Bitset csetout, struct rx_superstate *superstate,
1143 compute_super_edge (rx, dfout, csetout, superstate, chr)
1145 struct rx_distinct_future **dfout;
1147 struct rx_superstate *superstate;
1151 struct rx_superset *stateset = superstate->contents;
1153 /* To compute the set of characters that share edges with CHR,
1154 * we start with the full character set, and subtract.
1156 rx_bitset_universe (rx->local_cset_size, csetout);
1159 /* Iterate over the NFA states in the superstate state-set. */
1160 while (stateset->car)
1162 struct rx_nfa_edge *e;
1163 for (e = stateset->car->edges; e; e = e->next)
1164 if (e->type == ne_cset)
1166 if (!RX_bitset_member (e->params.cset, chr))
1167 /* An edge that doesn't apply at least tells us some characters
1168 * that don't share the same edge set as CHR.
1170 rx_bitset_difference (rx->local_cset_size, csetout, e->params.cset);
1173 /* If we find an NFA edge that applies, we make sure there
1174 * are corresponding edges in the superstate NFA.
1177 struct rx_distinct_future * saved;
1179 *dfout = include_futures (rx, *dfout, e->dest, superstate);
1182 struct rx_distinct_future * df;
1185 df->next_same_super_edge[1]->next_same_super_edge[0] = 0;
1188 struct rx_distinct_future *dft;
1190 df = df->next_same_super_edge[0];
1192 if (dft->future && dft->future->transition_refs == dft)
1194 dft->future->transition_refs = dft->next_same_dest;
1195 if (dft->future->transition_refs == dft)
1196 dft->future->transition_refs = 0;
1198 dft->next_same_dest->prev_same_dest = dft->prev_same_dest;
1199 dft->prev_same_dest->next_same_dest = dft->next_same_dest;
1200 rx_cache_free (rx->cache, sizeof (*dft), (char *)dft);
1205 /* We also trim the character set a bit. */
1206 rx_bitset_intersection (rx->local_cset_size,
1207 csetout, e->params.cset);
1210 stateset = stateset->cdr;
1216 /* This is a constructor for RX_SUPER_EDGE structures. These are
1217 * wrappers for lists of superstate NFA edges that share character sets labels.
1218 * If a transition class contains more than one rx_distinct_future (superstate
1219 * edge), then it represents a non-determinism in the superstate NFA.
1223 static struct rx_super_edge *
1224 rx_super_edge (struct rx *rx,
1225 struct rx_superstate *super, rx_Bitset cset,
1226 struct rx_distinct_future *df)
1228 static struct rx_super_edge *
1229 rx_super_edge (rx, super, cset, df)
1231 struct rx_superstate *super;
1233 struct rx_distinct_future *df;
1236 struct rx_super_edge *tc;
1239 tc_size = ( sizeof (struct rx_super_edge)
1240 + rx_sizeof_bitset (rx->local_cset_size));
1242 tc = ((struct rx_super_edge *)
1243 rx_cache_malloc (rx->cache, tc_size));
1248 tc->next = super->edges;
1250 tc->rx_backtrack_frame.inx = rx->instruction_table[rx_backtrack_point];
1251 tc->rx_backtrack_frame.data = 0;
1252 tc->rx_backtrack_frame.data_2 = (void *) tc;
1254 tc->cset = (rx_Bitset) ((char *) tc + sizeof (*tc));
1255 rx_bitset_assign (rx->local_cset_size, tc->cset, cset);
1258 struct rx_distinct_future * dfp = df;
1259 df->next_same_super_edge[1]->next_same_super_edge[0] = 0;
1263 dfp = dfp->next_same_super_edge[0];
1265 df->next_same_super_edge[1]->next_same_super_edge[0] = df;
1271 /* There are three kinds of cache miss. The first occurs when a
1272 * transition is taken that has never been computed during the
1273 * lifetime of the source superstate. That cache miss is handled by
1274 * calling COMPUTE_SUPER_EDGE. The second kind of cache miss
1275 * occurs when the destination superstate of a transition doesn't
1276 * exist. SOLVE_DESTINATION is used to construct the destination superstate.
1277 * Finally, the third kind of cache miss occurs when the destination
1278 * superstate of a transition is in a `semi-free state'. That case is
1279 * handled by UNFREE_SUPERSTATE.
1281 * The function of HANDLE_CACHE_MISS is to figure out which of these
1287 install_partial_transition (struct rx_superstate *super,
1288 struct rx_inx *answer,
1289 RX_subset set, int offset)
1292 install_partial_transition (super, answer, set, offset)
1293 struct rx_superstate *super;
1294 struct rx_inx *answer;
1300 int end = start + 32;
1302 struct rx_inx * transitions = super->transitions;
1307 transitions[start] = *answer;
1316 rx_handle_cache_miss (struct rx *rx, struct rx_superstate *super, unsigned char chr, void *data)
1319 rx_handle_cache_miss (rx, super, chr, data)
1321 struct rx_superstate *super;
1326 int offset = chr / RX_subset_bits;
1327 struct rx_distinct_future *df = data;
1329 if (!df) /* must be the shared_cache_miss_frame */
1331 /* Perhaps this is just a transition waiting to be filled. */
1332 struct rx_super_edge *tc;
1333 RX_subset mask = rx_subset_singletons [chr % RX_subset_bits];
1335 for (tc = super->edges; tc; tc = tc->next)
1336 if (tc->cset[offset] & mask)
1338 struct rx_inx * answer;
1340 answer = ((tc->options->next_same_super_edge[0] != tc->options)
1341 ? &tc->rx_backtrack_frame
1343 ? &df->side_effects_frame
1344 : &df->future_frame));
1345 install_partial_transition (super, answer,
1346 tc->cset [offset], offset * 32);
1349 /* Otherwise, it's a flushed or newly encountered edge. */
1351 char cset_space[1024]; /* this limit is far from unreasonable */
1353 struct rx_inx *answer;
1355 if (rx_sizeof_bitset (rx->local_cset_size) > sizeof (cset_space))
1356 return 0; /* If the arbitrary limit is hit, always fail */
1358 trcset = (rx_Bitset)cset_space;
1359 rx_lock_superstate (rx, super);
1360 if (!compute_super_edge (rx, &df, trcset, super, chr))
1362 rx_unlock_superstate (rx, super);
1365 if (!df) /* We just computed the fail transition. */
1367 static struct rx_inx
1368 shared_fail_frame = { 0, 0, (void *)rx_backtrack, 0 };
1369 answer = &shared_fail_frame;
1373 tc = rx_super_edge (rx, super, trcset, df);
1376 rx_unlock_superstate (rx, super);
1379 answer = ((tc->options->next_same_super_edge[0] != tc->options)
1380 ? &tc->rx_backtrack_frame
1382 ? &df->side_effects_frame
1383 : &df->future_frame));
1385 install_partial_transition (super, answer,
1386 trcset[offset], offset * 32);
1387 rx_unlock_superstate (rx, super);
1391 else if (df->future) /* A cache miss on an edge with a future? Must be
1392 * a semi-free destination. */
1394 if (df->future->is_semifree)
1395 refresh_semifree_superstate (rx->cache, df->future);
1396 return &df->future_frame;
1399 /* no future superstate on an existing edge */
1401 rx_lock_superstate (rx, super);
1402 if (!solve_destination (rx, df))
1404 rx_unlock_superstate (rx, super);
1408 && (df->edge->options->next_same_super_edge[0] == df->edge->options))
1409 install_partial_transition (super, &df->future_frame,
1410 df->edge->cset[offset], offset * 32);
1411 rx_unlock_superstate (rx, super);
1412 return &df->future_frame;