fix possible crash on user deletion
[srvx.git] / rx / rxsuper.c
1
2
3 /*      Copyright (C) 1995, 1996 Tom Lord
4  * 
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU Library General Public License as published by
7  * the Free Software Foundation; either version 2, or (at your option)
8  * any later version.
9  * 
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU Library General Public License for more details.
14  * 
15  * You should have received a copy of the GNU Library General Public License
16  * along with this software; see the file COPYING.  If not, write to
17  * the Free Software Foundation, 59 Temple Place - Suite 330, 
18  * Boston, MA 02111-1307, USA. 
19  */
20
21 \f
22
23 #include "rxall.h"
24 #include "rxsuper.h"
25
26 \f
27 /* The functions in the next several pages define the lazy-NFA-conversion used
28  * by matchers.  The input to this construction is an NFA such as 
29  * is built by compactify_nfa (rx.c).  The output is the superNFA.
30  */
31
32 /* Match engines can use arbitrary values for opcodes.  So, the parse tree 
33  * is built using instructions names (enum rx_opcode), but the superstate
34  * nfa is populated with mystery opcodes (void *).
35  *
36  * For convenience, here is an id table.  The opcodes are == to their inxs
37  *
38  * The lables in re_search_2 would make good values for instructions.
39  */
40
41 void * rx_id_instruction_table[rx_num_instructions] =
42 {
43   (void *) rx_backtrack_point,
44   (void *) rx_do_side_effects,
45   (void *) rx_cache_miss,
46   (void *) rx_next_char,
47   (void *) rx_backtrack,
48   (void *) rx_error_inx
49 };
50
51 \f
52
53
54 /* The partially instantiated superstate graph has a transition 
55  * table at every node.  There is one entry for every character.
56  * This fills in the transition for a set.
57  */
58 #ifdef __STDC__
59 static void 
60 install_transition (struct rx_superstate *super,
61                     struct rx_inx *answer, rx_Bitset trcset) 
62 #else
63 static void 
64 install_transition (super, answer, trcset)
65      struct rx_superstate *super;
66      struct rx_inx *answer;
67      rx_Bitset trcset;
68 #endif
69 {
70   struct rx_inx * transitions = super->transitions;
71   int chr;
72   for (chr = 0; chr < 256; )
73     if (!*trcset)
74       {
75         ++trcset;
76         chr += 32;
77       }
78     else
79       {
80         RX_subset sub = *trcset;
81         RX_subset mask = 1;
82         int bound = chr + 32;
83         while (chr < bound)
84           {
85             if (sub & mask)
86               transitions [chr] = *answer;
87             ++chr;
88             mask <<= 1;
89           }
90         ++trcset;
91       }
92 }
93
94
95 #ifdef __STDC__
96 static int
97 qlen (struct rx_superstate * q)
98 #else
99 static int
100 qlen (q)
101      struct rx_superstate * q;
102 #endif
103 {
104   int count = 1;
105   struct rx_superstate * it;
106   if (!q)
107     return 0;
108   for (it = q->next_recyclable; it != q; it = it->next_recyclable)
109     ++count;
110   return count;
111 }
112
113 #ifdef __STDC__
114 static void
115 check_cache (struct rx_cache * cache)
116 #else
117 static void
118 check_cache (cache)
119      struct rx_cache * cache;
120 #endif
121 {
122   struct rx_cache * you_fucked_up = 0;
123   int total = cache->superstates;
124   int semi = cache->semifree_superstates;
125   if (semi != qlen (cache->semifree_superstate))
126     check_cache (you_fucked_up);
127   if ((total - semi) != qlen (cache->lru_superstate))
128     check_cache (you_fucked_up);
129 }
130 #ifdef __STDC__
131 char *
132 rx_cache_malloc (struct rx_cache * cache, int size)
133 #else
134 char *
135 rx_cache_malloc (cache, size)
136      struct rx_cache * cache;
137      int size;
138 #endif
139 {
140   char * answer;
141   answer = (char *)malloc (size);
142   if (answer)
143     cache->bytes_used += size;
144   return answer;
145 }
146
147
148 #ifdef __STDC__
149 void
150 rx_cache_free (struct rx_cache * cache, int size, char * mem)
151 #else
152 void
153 rx_cache_free (cache, size, mem)
154      struct rx_cache * cache;
155      int size;
156      char * mem;
157 #endif
158 {
159   free (mem);
160   cache->bytes_used -= size;
161 }
162
163
164
165 /* When a superstate is old and neglected, it can enter a 
166  * semi-free state.  A semi-free state is slated to die.
167  * Incoming transitions to a semi-free state are re-written
168  * to cause an (interpreted) fault when they are taken.
169  * The fault handler revives the semi-free state, patches
170  * incoming transitions back to normal, and continues.
171  *
172  * The idea is basicly to free in two stages, aborting 
173  * between the two if the state turns out to be useful again.
174  * When a free is aborted, the rescued superstate is placed
175  * in the most-favored slot to maximize the time until it
176  * is next semi-freed.
177  *
178  * Overall, this approximates truly freeing superstates in
179  * lru order, but does not involve the cost of updating perfectly 
180  * accurate timestamps every time a superstate is touched.
181  */
182
183 #ifdef __STDC__
184 static void
185 semifree_superstate (struct rx_cache * cache)
186 #else
187 static void
188 semifree_superstate (cache)
189      struct rx_cache * cache;
190 #endif
191 {
192   int disqualified = cache->semifree_superstates;
193   if (disqualified == cache->superstates)
194     return;
195   while (cache->lru_superstate->locks)
196     {
197       cache->lru_superstate = cache->lru_superstate->next_recyclable;
198       ++disqualified;
199       if (disqualified == cache->superstates)
200         return;
201     }
202   {
203     struct rx_superstate * it = cache->lru_superstate;
204     it->next_recyclable->prev_recyclable = it->prev_recyclable;
205     it->prev_recyclable->next_recyclable = it->next_recyclable;
206     cache->lru_superstate = (it == it->next_recyclable
207                              ? 0
208                              : it->next_recyclable);
209     if (!cache->semifree_superstate)
210       {
211         cache->semifree_superstate = it;
212         it->next_recyclable = it;
213         it->prev_recyclable = it;
214       }
215     else
216       {
217         it->prev_recyclable = cache->semifree_superstate->prev_recyclable;
218         it->next_recyclable = cache->semifree_superstate;
219         it->prev_recyclable->next_recyclable = it;
220         it->next_recyclable->prev_recyclable = it;
221       }
222     {
223       struct rx_distinct_future *df;
224       it->is_semifree = 1;
225       ++cache->semifree_superstates;
226       df = it->transition_refs;
227       if (df)
228         {
229           df->prev_same_dest->next_same_dest = 0;
230           for (df = it->transition_refs; df; df = df->next_same_dest)
231             {
232               df->future_frame.inx = cache->instruction_table[rx_cache_miss];
233               df->future_frame.data = 0;
234               df->future_frame.data_2 = (void *) df;
235               /* If there are any NEXT-CHAR instruction frames that
236                * refer to this state, we convert them to CACHE-MISS frames.
237                */
238               if (!df->effects
239                   && (df->edge->options->next_same_super_edge[0]
240                       == df->edge->options))
241                 install_transition (df->present, &df->future_frame,
242                                     df->edge->cset);
243             }
244           df = it->transition_refs;
245           df->prev_same_dest->next_same_dest = df;
246         }
247     }
248   }
249 }
250
251
252 #ifdef __STDC__
253 static void 
254 refresh_semifree_superstate (struct rx_cache * cache,
255                              struct rx_superstate * super)
256 #else
257 static void 
258 refresh_semifree_superstate (cache, super)
259      struct rx_cache * cache;
260      struct rx_superstate * super;
261 #endif
262 {
263   struct rx_distinct_future *df;
264
265   if (super->transition_refs)
266     {
267       super->transition_refs->prev_same_dest->next_same_dest = 0; 
268       for (df = super->transition_refs; df; df = df->next_same_dest)
269         {
270           df->future_frame.inx = cache->instruction_table[rx_next_char];
271           df->future_frame.data = (void *) super->transitions;
272           df->future_frame.data_2 = (void *)(super->contents->is_final);
273           /* CACHE-MISS instruction frames that refer to this state,
274            * must be converted to NEXT-CHAR frames.
275            */
276           if (!df->effects
277               && (df->edge->options->next_same_super_edge[0]
278                   == df->edge->options))
279             install_transition (df->present, &df->future_frame,
280                                 df->edge->cset);
281         }
282       super->transition_refs->prev_same_dest->next_same_dest
283         = super->transition_refs;
284     }
285   if (cache->semifree_superstate == super)
286     cache->semifree_superstate = (super->prev_recyclable == super
287                                   ? 0
288                                   : super->prev_recyclable);
289   super->next_recyclable->prev_recyclable = super->prev_recyclable;
290   super->prev_recyclable->next_recyclable = super->next_recyclable;
291
292   if (!cache->lru_superstate)
293     (cache->lru_superstate
294      = super->next_recyclable
295      = super->prev_recyclable
296      = super);
297   else
298     {
299       super->next_recyclable = cache->lru_superstate;
300       super->prev_recyclable = cache->lru_superstate->prev_recyclable;
301       super->next_recyclable->prev_recyclable = super;
302       super->prev_recyclable->next_recyclable = super;
303     }
304   super->is_semifree = 0;
305   --cache->semifree_superstates;
306 }
307
308 #ifdef __STDC__
309 void
310 rx_refresh_this_superstate (struct rx_cache * cache,
311                             struct rx_superstate * superstate)
312 #else
313 void
314 rx_refresh_this_superstate (cache, superstate)
315      struct rx_cache * cache;
316      struct rx_superstate * superstate;
317 #endif
318 {
319   if (superstate->is_semifree)
320     refresh_semifree_superstate (cache, superstate);
321   else if (cache->lru_superstate == superstate)
322     cache->lru_superstate = superstate->next_recyclable;
323   else if (superstate != cache->lru_superstate->prev_recyclable)
324     {
325       superstate->next_recyclable->prev_recyclable
326         = superstate->prev_recyclable;
327       superstate->prev_recyclable->next_recyclable
328         = superstate->next_recyclable;
329       superstate->next_recyclable = cache->lru_superstate;
330       superstate->prev_recyclable = cache->lru_superstate->prev_recyclable;
331       superstate->next_recyclable->prev_recyclable = superstate;
332       superstate->prev_recyclable->next_recyclable = superstate;
333     }
334 }
335
336 #ifdef __STDC__
337 static void 
338 release_superset_low (struct rx_cache * cache,
339                      struct rx_superset *set)
340 #else
341 static void 
342 release_superset_low (cache, set)
343      struct rx_cache * cache;
344      struct rx_superset *set;
345 #endif
346 {
347   if (!--set->refs)
348     {
349       if (set->starts_for)
350         set->starts_for->start_set = 0;
351
352       if (set->cdr)
353         release_superset_low (cache, set->cdr);
354
355       rx_hash_free (&set->hash_item, &cache->superset_hash_rules);
356       rx_cache_free (cache,
357                      sizeof (struct rx_superset),
358                      (char *)set);
359     }
360 }
361
362 #ifdef __STDC__
363 void 
364 rx_release_superset (struct rx *rx,
365                      struct rx_superset *set)
366 #else
367 void 
368 rx_release_superset (rx, set)
369      struct rx *rx;
370      struct rx_superset *set;
371 #endif
372 {
373   release_superset_low (rx->cache, set);
374 }
375
376 /* This tries to add a new superstate to the superstate freelist.
377  * It might, as a result, free some edge pieces or hash tables.
378  * If nothing can be freed because too many locks are being held, fail.
379  */
380
381 #ifdef __STDC__
382 static int
383 rx_really_free_superstate (struct rx_cache * cache)
384 #else
385 static int
386 rx_really_free_superstate (cache)
387      struct rx_cache * cache;
388 #endif
389 {
390   int locked_superstates = 0;
391   struct rx_superstate * it;
392
393   if (!cache->superstates)
394     return 0;
395
396   /* scale these */
397   while ((cache->hits + cache->misses) > cache->superstates)
398     {
399       cache->hits >>= 1;
400       cache->misses >>= 1;
401     }
402
403   /* semifree superstates faster than they are freed
404    * so popular states can be rescued.
405    */
406   semifree_superstate (cache);
407   semifree_superstate (cache);
408   semifree_superstate (cache);
409
410 #if 0
411   Redundant because semifree_superstate already 
412     makes this check;
413
414
415   while (cache->semifree_superstate && cache->semifree_superstate->locks)
416     {
417       refresh_semifree_superstate (cache, cache->semifree_superstate);
418       ++locked_superstates;
419       if (locked_superstates == cache->superstates)
420         return 0;
421     }
422
423
424   if (cache->semifree_superstate)
425     insert the "it =" block which follows this "if 0" code;
426   else
427     {
428       while (cache->lru_superstate->locks)
429         {
430           cache->lru_superstate = cache->lru_superstate->next_recyclable;
431           ++locked_superstates;
432           if (locked_superstates == cache->superstates)
433             return 0;
434         }
435       it = cache->lru_superstate;
436       it->next_recyclable->prev_recyclable = it->prev_recyclable;
437       it->prev_recyclable->next_recyclable = it->next_recyclable;
438       cache->lru_superstate = ((it == it->next_recyclable)
439                                     ? 0
440                                     : it->next_recyclable);
441     }
442 #endif
443
444     if (!cache->semifree_superstate)
445       return 0;
446
447     {
448       it = cache->semifree_superstate;
449       it->next_recyclable->prev_recyclable = it->prev_recyclable;
450       it->prev_recyclable->next_recyclable = it->next_recyclable;
451       cache->semifree_superstate = ((it == it->next_recyclable)
452                                     ? 0
453                                     : it->next_recyclable);
454       --cache->semifree_superstates;
455     }
456
457
458   if (it->transition_refs)
459     {
460       struct rx_distinct_future *df;
461       for (df = it->transition_refs,
462            df->prev_same_dest->next_same_dest = 0;
463            df;
464            df = df->next_same_dest)
465         {
466           df->future_frame.inx = cache->instruction_table[rx_cache_miss];
467           df->future_frame.data = 0;
468           df->future_frame.data_2 = (void *) df;
469           df->future = 0;
470         }
471       it->transition_refs->prev_same_dest->next_same_dest =
472         it->transition_refs;
473     }
474   {
475     struct rx_super_edge *tc = it->edges;
476     while (tc)
477       {
478         struct rx_distinct_future * df;
479         struct rx_super_edge *tct = tc->next;
480         df = tc->options;
481         df->next_same_super_edge[1]->next_same_super_edge[0] = 0;
482         while (df)
483           {
484             struct rx_distinct_future *dft = df;
485             df = df->next_same_super_edge[0];
486             
487             
488             if (dft->future && dft->future->transition_refs == dft)
489               {
490                 dft->future->transition_refs = dft->next_same_dest;
491                 if (dft->future->transition_refs == dft)
492                   dft->future->transition_refs = 0;
493               }
494             dft->next_same_dest->prev_same_dest = dft->prev_same_dest;
495             dft->prev_same_dest->next_same_dest = dft->next_same_dest;
496             rx_cache_free (cache,
497                            sizeof (struct rx_distinct_future),
498                            (char *)dft);
499           }
500         rx_cache_free (cache,
501                        sizeof (struct rx_super_edge),
502                        (char *)tc);
503         tc = tct;
504       }
505   }
506   
507   if (it->contents->superstate == it)
508     it->contents->superstate = 0;
509   release_superset_low (cache, it->contents);
510   rx_cache_free (cache,
511                  (sizeof (struct rx_superstate)
512                   + cache->local_cset_size * sizeof (struct rx_inx)),
513                  (char *)it);
514   --cache->superstates;
515   return 1;
516 }
517
518
519 #ifdef __STDC__
520 static char *
521 rx_cache_malloc_or_get (struct rx_cache * cache, int size)
522 #else
523 static char *
524 rx_cache_malloc_or_get (cache, size)
525      struct rx_cache * cache;
526      int size;
527 #endif
528 {
529   while (   (cache->bytes_used + size > cache->bytes_allowed)
530          && rx_really_free_superstate (cache))
531     ;
532
533   return rx_cache_malloc (cache, size);
534 }
535
536 \f
537
538 #ifdef __STDC__
539 static int
540 supersetcmp (void * va, void * vb)
541 #else
542 static int
543 supersetcmp (va, vb)
544      void * va;
545      void * vb;
546 #endif
547 {
548   struct rx_superset * a = (struct rx_superset *)va;
549   struct rx_superset * b = (struct rx_superset *)vb;
550   return (   (a == b)
551           || (a && b && (a->id == b->id) && (a->car == b->car) && (a->cdr == b->cdr)));
552 }
553
554 #define rx_abs(A) (((A) > 0) ? (A) : -(A))
555
556
557 #ifdef __STDC__
558 static struct rx_hash_item *
559 superset_allocator (struct rx_hash_rules * rules, void * val)
560 #else
561 static struct rx_hash_item *
562 superset_allocator (rules, val)
563      struct rx_hash_rules * rules;
564      void * val;
565 #endif
566 {
567   struct rx_cache * cache;
568   struct rx_superset * template;
569   struct rx_superset * newset;
570
571   cache = ((struct rx_cache *)
572            ((char *)rules
573             - (unsigned long)(&((struct rx_cache *)0)->superset_hash_rules)));
574   template = (struct rx_superset *)val;
575   newset = ((struct rx_superset *)
576             rx_cache_malloc (cache, sizeof (*template)));
577   if (!newset)
578     return 0;
579   {
580     int cdrfinal;
581     int cdredges;
582
583     cdrfinal = (template->cdr
584                 ? template->cdr->is_final
585                 : 0);
586     cdredges = (template->cdr
587                 ? template->cdr->has_cset_edges
588                 : 0);
589     
590     newset->is_final = (rx_abs (template->car->is_final) > rx_abs(cdrfinal)
591                         ? template->car->is_final
592                         : template->cdr->is_final);
593     newset->has_cset_edges = (template->car->has_cset_edges || cdredges);
594   }
595   newset->refs = 0;
596   newset->id = template->id;
597   newset->car = template->car;
598   newset->cdr = template->cdr;
599   rx_protect_superset (rx, template->cdr);
600   newset->superstate = 0;
601   newset->starts_for = 0;
602   newset->hash_item.data = (void *)newset;
603   newset->hash_item.binding = 0;
604   return &newset->hash_item;
605 }
606
607 #ifdef __STDC__
608 static struct rx_hash * 
609 super_hash_allocator (struct rx_hash_rules * rules)
610 #else
611 static struct rx_hash * 
612 super_hash_allocator (rules)
613      struct rx_hash_rules * rules;
614 #endif
615 {
616   struct rx_cache * cache;
617   cache = ((struct rx_cache *)
618            ((char *)rules
619             - (unsigned long)(&((struct rx_cache *)0)->superset_hash_rules)));
620   return ((struct rx_hash *)
621           rx_cache_malloc (cache, sizeof (struct rx_hash)));
622 }
623
624
625 #ifdef __STDC__
626 static void
627 super_hash_liberator (struct rx_hash * hash, struct rx_hash_rules * rules)
628 #else
629 static void
630 super_hash_liberator (hash, rules)
631      struct rx_hash * hash;
632      struct rx_hash_rules * rules;
633 #endif
634 {
635   struct rx_cache * cache
636     = ((struct rx_cache *)
637        (char *)rules - (long)(&((struct rx_cache *)0)->superset_hash_rules));
638   rx_cache_free (cache, sizeof (struct rx_hash), (char *)hash);
639 }
640
641 #ifdef __STDC__
642 static void
643 superset_hash_item_liberator (struct rx_hash_item * it,
644                               struct rx_hash_rules * rules)
645 #else
646 static void
647 superset_hash_item_liberator (it, rules)
648      struct rx_hash_item * it;
649      struct rx_hash_rules * rules;
650 #endif
651 {
652 }
653
654 int rx_cache_bound = 3;
655 static int rx_default_cache_got = 0;
656
657 static struct rx_cache default_cache = 
658 {
659   {
660     supersetcmp,
661     super_hash_allocator,
662     super_hash_liberator,
663     superset_allocator,
664     superset_hash_item_liberator,
665   },
666   0,
667   0,
668   0,
669   0,
670   0,
671   0,
672   0,
673   RX_DEFAULT_DFA_CACHE_SIZE,
674   0,
675   256,
676   rx_id_instruction_table,
677   {
678     0,
679     0,
680     0,
681     0,
682     0
683   }
684 };
685 struct rx_cache * rx_default_cache = &default_cache;
686
687 /* This adds an element to a superstate set.  These sets are lists, such
688  * that lists with == elements are ==.  The empty set is returned by
689  * superset_cons (rx, 0, 0) and is NOT equivelent to 
690  * (struct rx_superset)0.
691  */
692
693 #ifdef __STDC__
694 struct rx_superset *
695 rx_superset_cons (struct rx * rx,
696                   struct rx_nfa_state *car, struct rx_superset *cdr)
697 #else
698 struct rx_superset *
699 rx_superset_cons (rx, car, cdr)
700      struct rx * rx;
701      struct rx_nfa_state *car;
702      struct rx_superset *cdr;
703 #endif
704 {
705   struct rx_cache * cache = rx->cache;
706   if (!car && !cdr)
707     {
708       if (!cache->empty_superset)
709         {
710           cache->empty_superset
711             = ((struct rx_superset *)
712                rx_cache_malloc (cache, sizeof (struct rx_superset)));
713           if (!cache->empty_superset)
714             return 0;
715           rx_bzero ((char *)cache->empty_superset, sizeof (struct rx_superset));
716           cache->empty_superset->refs = 1000;
717         }
718       return cache->empty_superset;
719     }
720   {
721     struct rx_superset template;
722     struct rx_hash_item * hit;
723     template.car = car;
724     template.cdr = cdr;
725     template.id = rx->rx_id;
726     rx_protect_superset (rx, template.cdr);
727     hit = rx_hash_store (&cache->superset_table,
728                          (unsigned long)car ^ car->id ^ (unsigned long)cdr,
729                          (void *)&template,
730                          &cache->superset_hash_rules);
731     rx_protect_superset (rx, template.cdr);
732     return (hit
733             ?  (struct rx_superset *)hit->data
734             : 0);
735   }
736 }
737
738 /* This computes a union of two NFA state sets.  The sets do not have the
739  * same representation though.  One is a RX_SUPERSET structure (part
740  * of the superstate NFA) and the other is an NFA_STATE_SET (part of the NFA).
741  */
742
743 #ifdef __STDC__
744 struct rx_superset *
745 rx_superstate_eclosure_union (struct rx * rx, struct rx_superset *set, struct rx_nfa_state_set *ecl) 
746 #else
747 struct rx_superset *
748 rx_superstate_eclosure_union (rx, set, ecl)
749      struct rx * rx;
750      struct rx_superset *set;
751      struct rx_nfa_state_set *ecl;
752 #endif
753 {
754   if (!ecl)
755     return set;
756
757   if (!set->car)
758     return rx_superset_cons (rx, ecl->car,
759                              rx_superstate_eclosure_union (rx, set, ecl->cdr));
760   if (set->car == ecl->car)
761     return rx_superstate_eclosure_union (rx, set, ecl->cdr);
762
763   {
764     struct rx_superset * tail;
765     struct rx_nfa_state * first;
766
767     if (set->car->id < ecl->car->id)
768       {
769         tail = rx_superstate_eclosure_union (rx, set->cdr, ecl);
770         first = set->car;
771       }
772     else
773       {
774         tail = rx_superstate_eclosure_union (rx, set, ecl->cdr);
775         first = ecl->car;
776       }
777     if (!tail)
778       return 0;
779     else
780       {
781         struct rx_superset * answer;
782         answer = rx_superset_cons (rx, first, tail);
783         if (!answer)
784           {
785             rx_protect_superset (rx, tail);
786             rx_release_superset (rx, tail);
787             return 0;
788           }
789         else
790           return answer;
791       }
792   }
793 }
794
795
796 \f
797
798 /*
799  * This makes sure that a list of rx_distinct_futures contains
800  * a future for each possible set of side effects in the eclosure
801  * of a given state.  This is some of the work of filling in a
802  * superstate transition. 
803  */
804
805 #ifdef __STDC__
806 static struct rx_distinct_future *
807 include_futures (struct rx *rx,
808                  struct rx_distinct_future *df, struct rx_nfa_state
809                  *state, struct rx_superstate *superstate) 
810 #else
811 static struct rx_distinct_future *
812 include_futures (rx, df, state, superstate)
813      struct rx *rx;
814      struct rx_distinct_future *df;
815      struct rx_nfa_state *state;
816      struct rx_superstate *superstate;
817 #endif
818 {
819   struct rx_possible_future *future;
820   struct rx_cache * cache = rx->cache;
821   for (future = rx_state_possible_futures (rx, state); future; future = future->next)
822     {
823       struct rx_distinct_future *dfp;
824       struct rx_distinct_future *insert_before = 0;
825       if (df)
826         df->next_same_super_edge[1]->next_same_super_edge[0] = 0;
827       for (dfp = df; dfp; dfp = dfp->next_same_super_edge[0])
828         if (dfp->effects == future->effects)
829           break;
830         else
831           {
832             int order = rx->se_list_cmp (rx, dfp->effects, future->effects);
833             if (order > 0)
834               {
835                 insert_before = dfp;
836                 dfp = 0;
837                 break;
838               }
839           }
840       if (df)
841         df->next_same_super_edge[1]->next_same_super_edge[0] = df;
842       if (!dfp)
843         {
844           dfp
845             = ((struct rx_distinct_future *)
846                rx_cache_malloc (cache,
847                                 sizeof (struct rx_distinct_future)));
848           if (!dfp)
849             return 0;
850           if (!df)
851             {
852               df = insert_before = dfp;
853               df->next_same_super_edge[0] = df->next_same_super_edge[1] = df;
854             }
855           else if (!insert_before)
856             insert_before = df;
857           else if (insert_before == df)
858             df = dfp;
859
860           dfp->next_same_super_edge[0] = insert_before;
861           dfp->next_same_super_edge[1]
862             = insert_before->next_same_super_edge[1];
863           dfp->next_same_super_edge[1]->next_same_super_edge[0] = dfp;
864           dfp->next_same_super_edge[0]->next_same_super_edge[1] = dfp;
865           dfp->next_same_dest = dfp->prev_same_dest = dfp;
866           dfp->future = 0;
867           dfp->present = superstate;
868           dfp->future_frame.inx = rx->instruction_table[rx_cache_miss];
869           dfp->future_frame.data = 0;
870           dfp->future_frame.data_2 = (void *) dfp;
871           dfp->side_effects_frame.inx
872             = rx->instruction_table[rx_do_side_effects];
873           dfp->side_effects_frame.data = 0;
874           dfp->side_effects_frame.data_2 = (void *) dfp;
875           dfp->effects = future->effects;
876         }
877     }
878   return df;
879 }
880 \f
881
882
883 /* This constructs a new superstate from its state set.  The only 
884  * complexity here is memory management.
885  */
886 #ifdef __STDC__
887 struct rx_superstate *
888 rx_superstate (struct rx *rx,
889                struct rx_superset *set)
890 #else
891 struct rx_superstate *
892 rx_superstate (rx, set)
893      struct rx *rx;
894      struct rx_superset *set;
895 #endif
896 {
897   struct rx_cache * cache = rx->cache;
898   struct rx_superstate * superstate = 0;
899
900   /* Does the superstate already exist in the cache? */
901   if (set->superstate)
902     {
903       if (set->superstate->rx_id != rx->rx_id)
904         {
905           /* Aha.  It is in the cache, but belongs to a superstate
906            * that refers to an NFA that no longer exists.
907            * (We know it no longer exists because it was evidently
908            *  stored in the same region of memory as the current nfa
909            *  yet it has a different id.)
910            */
911           superstate = set->superstate;
912           if (!superstate->is_semifree)
913             {
914               if (cache->lru_superstate == superstate)
915                 {
916                   cache->lru_superstate = superstate->next_recyclable;
917                   if (cache->lru_superstate == superstate)
918                     cache->lru_superstate = 0;
919                 }
920               {
921                 superstate->next_recyclable->prev_recyclable
922                   = superstate->prev_recyclable;
923                 superstate->prev_recyclable->next_recyclable
924                   = superstate->next_recyclable;
925                 if (!cache->semifree_superstate)
926                   {
927                     (cache->semifree_superstate
928                      = superstate->next_recyclable
929                      = superstate->prev_recyclable
930                      = superstate);
931                   }
932                 else
933                   {
934                     superstate->next_recyclable = cache->semifree_superstate;
935                     superstate->prev_recyclable
936                       = cache->semifree_superstate->prev_recyclable;
937                     superstate->next_recyclable->prev_recyclable
938                       = superstate;
939                     superstate->prev_recyclable->next_recyclable
940                       = superstate;
941                     cache->semifree_superstate = superstate;
942                   }
943                 ++cache->semifree_superstates;
944               }
945             }
946           set->superstate = 0;
947           goto handle_cache_miss;
948         }
949       ++cache->hits;
950       superstate = set->superstate;
951
952       rx_refresh_this_superstate (cache, superstate);
953       return superstate;
954     }
955
956  handle_cache_miss:
957
958   /* This point reached only for cache misses. */
959   ++cache->misses;
960 #if RX_DEBUG
961   if (rx_debug_trace > 1)
962     {
963       struct rx_superset * setp = set;
964       fprintf (stderr, "Building a superstet %d(%d): ", rx->rx_id, set);
965       while (setp)
966         {
967           fprintf (stderr, "%d ", setp->id);
968           setp = setp->cdr;
969         }
970       fprintf (stderr, "(%d)\n", set);
971     }
972 #endif
973
974   {
975     int superstate_size;
976     superstate_size = (  sizeof (*superstate)
977                        + (  sizeof (struct rx_inx)
978                           * rx->local_cset_size));
979     superstate = ((struct rx_superstate *)
980                   rx_cache_malloc_or_get (cache, superstate_size));
981     ++cache->superstates;
982   }                                                            
983
984   if (!superstate)
985     return 0;
986
987   if (!cache->lru_superstate)
988     (cache->lru_superstate
989      = superstate->next_recyclable
990      = superstate->prev_recyclable
991      = superstate);
992   else
993     {
994       superstate->next_recyclable = cache->lru_superstate;
995       superstate->prev_recyclable = cache->lru_superstate->prev_recyclable;
996       (  superstate->prev_recyclable->next_recyclable
997        = superstate->next_recyclable->prev_recyclable
998        = superstate);
999     }
1000   superstate->rx_id = rx->rx_id;
1001   superstate->transition_refs = 0;
1002   superstate->locks = 0;
1003   superstate->is_semifree = 0;
1004   set->superstate = superstate;
1005   superstate->contents = set;
1006   rx_protect_superset (rx, set);
1007   superstate->edges = 0;
1008   {
1009     int x;
1010     /* None of the transitions from this superstate are known yet. */
1011     for (x = 0; x < rx->local_cset_size; ++x)
1012       {
1013         struct rx_inx * ifr = &superstate->transitions[x];
1014         ifr->inx = rx->instruction_table [rx_cache_miss];
1015         ifr->data = ifr->data_2 = 0;
1016       }
1017   }
1018   return superstate;
1019 }
1020 \f
1021
1022 /* This computes the destination set of one edge of the superstate NFA.
1023  * Note that a RX_DISTINCT_FUTURE is a superstate edge.
1024  * Returns 0 on an allocation failure.
1025  */
1026
1027 #ifdef __STDC__
1028 static int 
1029 solve_destination (struct rx *rx, struct rx_distinct_future *df)
1030 #else
1031 static int 
1032 solve_destination (rx, df)
1033      struct rx *rx;
1034      struct rx_distinct_future *df;
1035 #endif
1036 {
1037   struct rx_super_edge *tc = df->edge;
1038   struct rx_superset *nfa_state;
1039   struct rx_superset *nil_set = rx_superset_cons (rx, 0, 0);
1040   struct rx_superset *solution = nil_set;
1041   struct rx_superstate *dest;
1042
1043   rx_protect_superset (rx, solution);
1044   /* Iterate over all NFA states in the state set of this superstate. */
1045   for (nfa_state = df->present->contents;
1046        nfa_state->car;
1047        nfa_state = nfa_state->cdr)
1048     {
1049       struct rx_nfa_edge *e;
1050       /* Iterate over all edges of each NFA state. */
1051       for (e = nfa_state->car->edges; e; e = e->next)
1052         /* If we find an edge that is labeled with 
1053          * the characters we are solving for.....
1054          */
1055         if ((e->type == ne_cset)
1056             && rx_bitset_is_subset (rx->local_cset_size,
1057                                     tc->cset,
1058                                     e->params.cset))
1059           {
1060             struct rx_nfa_state *n = e->dest;
1061             struct rx_possible_future *pf;
1062             /* ....search the partial epsilon closures of the destination
1063              * of that edge for a path that involves the same set of
1064              * side effects we are solving for.
1065              * If we find such a RX_POSSIBLE_FUTURE, we add members to the
1066              * stateset we are computing.
1067              */
1068             for (pf = rx_state_possible_futures (rx, n); pf; pf = pf->next)
1069               if (pf->effects == df->effects)
1070                 {
1071                   struct rx_superset * old_sol;
1072                   old_sol = solution;
1073                   solution = rx_superstate_eclosure_union (rx, solution,
1074                                                            pf->destset);
1075                   if (!solution)
1076                     return 0;
1077                   rx_protect_superset (rx, solution);
1078                   rx_release_superset (rx, old_sol);
1079                 }
1080           }
1081     }
1082   /* It is possible that the RX_DISTINCT_FUTURE we are working on has 
1083    * the empty set of NFA states as its definition.  In that case, this
1084    * is a failure point.
1085    */
1086   if (solution == nil_set)
1087     {
1088       df->future_frame.inx = (void *) rx_backtrack;
1089       df->future_frame.data = 0;
1090       df->future_frame.data_2 = 0;
1091       return 1;
1092     }
1093   dest = rx_superstate (rx, solution);
1094   rx_release_superset (rx, solution);
1095   if (!dest)
1096     return 0;
1097
1098   {
1099     struct rx_distinct_future *dft;
1100     dft = df;
1101     df->prev_same_dest->next_same_dest = 0;
1102     while (dft)
1103       {
1104         dft->future = dest;
1105         dft->future_frame.inx = rx->instruction_table[rx_next_char];
1106         dft->future_frame.data = (void *) dest->transitions;
1107         dft->future_frame.data_2 = (void *) dest->contents->is_final;
1108         dft = dft->next_same_dest;
1109       }
1110     df->prev_same_dest->next_same_dest = df;
1111   }
1112   if (!dest->transition_refs)
1113     dest->transition_refs = df;
1114   else
1115     {
1116       struct rx_distinct_future *dft;
1117       dft = dest->transition_refs->next_same_dest;
1118       dest->transition_refs->next_same_dest = df->next_same_dest;
1119       df->next_same_dest->prev_same_dest = dest->transition_refs;
1120       df->next_same_dest = dft;
1121       dft->prev_same_dest = df;
1122     }
1123   return 1;
1124 }
1125
1126
1127 /* This takes a superstate and a character, and computes some edges
1128  * from the superstate NFA.  In particular, this computes all edges
1129  * that lead from SUPERSTATE given CHR.   This function also 
1130  * computes the set of characters that share this edge set.
1131  * This returns 0 on allocation error.
1132  * The character set and list of edges are returned through 
1133  * the paramters CSETOUT and DFOUT.
1134  */
1135
1136 #ifdef __STDC__
1137 static int 
1138 compute_super_edge (struct rx *rx, struct rx_distinct_future **dfout,
1139                           rx_Bitset csetout, struct rx_superstate *superstate,
1140                           unsigned char chr)  
1141 #else
1142 static int 
1143 compute_super_edge (rx, dfout, csetout, superstate, chr)
1144      struct rx *rx;
1145      struct rx_distinct_future **dfout;
1146      rx_Bitset csetout;
1147      struct rx_superstate *superstate;
1148      unsigned char chr;
1149 #endif
1150 {
1151   struct rx_superset *stateset = superstate->contents;
1152
1153   /* To compute the set of characters that share edges with CHR, 
1154    * we start with the full character set, and subtract.
1155    */
1156   rx_bitset_universe (rx->local_cset_size, csetout);
1157   *dfout = 0;
1158
1159   /* Iterate over the NFA states in the superstate state-set. */
1160   while (stateset->car)
1161     {
1162       struct rx_nfa_edge *e;
1163       for (e = stateset->car->edges; e; e = e->next)
1164         if (e->type == ne_cset)
1165           {
1166             if (!RX_bitset_member (e->params.cset, chr))
1167               /* An edge that doesn't apply at least tells us some characters
1168                * that don't share the same edge set as CHR.
1169                */
1170               rx_bitset_difference (rx->local_cset_size, csetout, e->params.cset);
1171             else
1172               {
1173                 /* If we find an NFA edge that applies, we make sure there
1174                  * are corresponding edges in the superstate NFA.
1175                  */
1176                 {
1177                   struct rx_distinct_future * saved;
1178                   saved = *dfout;
1179                   *dfout = include_futures (rx, *dfout, e->dest, superstate);
1180                   if (!*dfout)
1181                     {
1182                       struct rx_distinct_future * df;
1183                       df = saved;
1184                       if (df)
1185                         df->next_same_super_edge[1]->next_same_super_edge[0] = 0;
1186                       while (df)
1187                         {
1188                           struct rx_distinct_future *dft;
1189                           dft = df;
1190                           df = df->next_same_super_edge[0];
1191
1192                           if (dft->future && dft->future->transition_refs == dft)
1193                             {
1194                               dft->future->transition_refs = dft->next_same_dest;
1195                               if (dft->future->transition_refs == dft)
1196                                 dft->future->transition_refs = 0;
1197                             }
1198                           dft->next_same_dest->prev_same_dest = dft->prev_same_dest;
1199                           dft->prev_same_dest->next_same_dest = dft->next_same_dest;
1200                           rx_cache_free (rx->cache, sizeof (*dft), (char *)dft);
1201                         }
1202                       return 0;
1203                     }
1204                 }
1205                 /* We also trim the character set a bit. */
1206                 rx_bitset_intersection (rx->local_cset_size,
1207                                         csetout, e->params.cset);
1208               }
1209           }
1210       stateset = stateset->cdr;
1211     }
1212   return 1;
1213 }
1214 \f
1215
1216 /* This is a constructor for RX_SUPER_EDGE structures.  These are
1217  * wrappers for lists of superstate NFA edges that share character sets labels.
1218  * If a transition class contains more than one rx_distinct_future (superstate
1219  * edge), then it represents a non-determinism in the superstate NFA.
1220  */
1221
1222 #ifdef __STDC__
1223 static struct rx_super_edge *
1224 rx_super_edge (struct rx *rx,
1225                struct rx_superstate *super, rx_Bitset cset,
1226                struct rx_distinct_future *df) 
1227 #else
1228 static struct rx_super_edge *
1229 rx_super_edge (rx, super, cset, df)
1230      struct rx *rx;
1231      struct rx_superstate *super;
1232      rx_Bitset cset;
1233      struct rx_distinct_future *df;
1234 #endif
1235 {
1236   struct rx_super_edge *tc;
1237   int tc_size;
1238
1239   tc_size = (  sizeof (struct rx_super_edge)
1240              + rx_sizeof_bitset (rx->local_cset_size));
1241
1242   tc = ((struct rx_super_edge *)
1243         rx_cache_malloc (rx->cache, tc_size));
1244
1245   if (!tc)
1246     return 0;
1247
1248   tc->next = super->edges;
1249   super->edges = tc;
1250   tc->rx_backtrack_frame.inx = rx->instruction_table[rx_backtrack_point];
1251   tc->rx_backtrack_frame.data = 0;
1252   tc->rx_backtrack_frame.data_2 = (void *) tc;
1253   tc->options = df;
1254   tc->cset = (rx_Bitset) ((char *) tc + sizeof (*tc));
1255   rx_bitset_assign (rx->local_cset_size, tc->cset, cset);
1256   if (df)
1257     {
1258       struct rx_distinct_future * dfp = df;
1259       df->next_same_super_edge[1]->next_same_super_edge[0] = 0;
1260       while (dfp)
1261         {
1262           dfp->edge = tc;
1263           dfp = dfp->next_same_super_edge[0];
1264         }
1265       df->next_same_super_edge[1]->next_same_super_edge[0] = df;
1266     }
1267   return tc;
1268 }
1269
1270
1271 /* There are three kinds of cache miss.  The first occurs when a
1272  * transition is taken that has never been computed during the
1273  * lifetime of the source superstate.  That cache miss is handled by
1274  * calling COMPUTE_SUPER_EDGE.  The second kind of cache miss
1275  * occurs when the destination superstate of a transition doesn't
1276  * exist.  SOLVE_DESTINATION is used to construct the destination superstate.
1277  * Finally, the third kind of cache miss occurs when the destination
1278  * superstate of a transition is in a `semi-free state'.  That case is
1279  * handled by UNFREE_SUPERSTATE.
1280  *
1281  * The function of HANDLE_CACHE_MISS is to figure out which of these
1282  * cases applies.
1283  */
1284
1285 #ifdef __STDC__
1286 static void
1287 install_partial_transition  (struct rx_superstate *super,
1288                              struct rx_inx *answer,
1289                              RX_subset set, int offset)
1290 #else
1291 static void
1292 install_partial_transition  (super, answer, set, offset)
1293      struct rx_superstate *super;
1294      struct rx_inx *answer;
1295      RX_subset set;
1296      int offset;
1297 #endif
1298 {
1299   int start = offset;
1300   int end = start + 32;
1301   RX_subset pos = 1;
1302   struct rx_inx * transitions = super->transitions;
1303   
1304   while (start < end)
1305     {
1306       if (set & pos)
1307         transitions[start] = *answer;
1308       pos <<= 1;
1309       ++start;
1310     }
1311 }
1312
1313
1314 #ifdef __STDC__
1315 struct rx_inx *
1316 rx_handle_cache_miss (struct rx *rx, struct rx_superstate *super, unsigned char chr, void *data) 
1317 #else
1318 struct rx_inx *
1319 rx_handle_cache_miss (rx, super, chr, data)
1320      struct rx *rx;
1321      struct rx_superstate *super;
1322      unsigned char chr;
1323      void *data;
1324 #endif
1325 {
1326   int offset = chr / RX_subset_bits;
1327   struct rx_distinct_future *df = data;
1328
1329   if (!df)                      /* must be the shared_cache_miss_frame */
1330     {
1331       /* Perhaps this is just a transition waiting to be filled. */
1332       struct rx_super_edge *tc;
1333       RX_subset mask = rx_subset_singletons [chr % RX_subset_bits];
1334
1335       for (tc = super->edges; tc; tc = tc->next)
1336         if (tc->cset[offset] & mask)
1337           {
1338             struct rx_inx * answer;
1339             df = tc->options;
1340             answer = ((tc->options->next_same_super_edge[0] != tc->options)
1341                       ? &tc->rx_backtrack_frame
1342                       : (df->effects
1343                          ? &df->side_effects_frame
1344                          : &df->future_frame));
1345             install_partial_transition (super, answer,
1346                                         tc->cset [offset], offset * 32);
1347             return answer;
1348           }
1349       /* Otherwise, it's a flushed or  newly encountered edge. */
1350       {
1351         char cset_space[1024];  /* this limit is far from unreasonable */
1352         rx_Bitset trcset;
1353         struct rx_inx *answer;
1354
1355         if (rx_sizeof_bitset (rx->local_cset_size) > sizeof (cset_space))
1356           return 0;             /* If the arbitrary limit is hit, always fail */
1357                                 /* cleanly. */
1358         trcset = (rx_Bitset)cset_space;
1359         rx_lock_superstate (rx, super);
1360         if (!compute_super_edge (rx, &df, trcset, super, chr))
1361           {
1362             rx_unlock_superstate (rx, super);
1363             return 0;
1364           }
1365         if (!df)                /* We just computed the fail transition. */
1366           {
1367             static struct rx_inx
1368               shared_fail_frame = { 0, 0, (void *)rx_backtrack, 0 };
1369             answer = &shared_fail_frame;
1370           }
1371         else
1372           {
1373             tc = rx_super_edge (rx, super, trcset, df);
1374             if (!tc)
1375               {
1376                 rx_unlock_superstate (rx, super);
1377                 return 0;
1378               }
1379             answer = ((tc->options->next_same_super_edge[0] != tc->options)
1380                       ? &tc->rx_backtrack_frame
1381                       : (df->effects
1382                          ? &df->side_effects_frame
1383                          : &df->future_frame));
1384           }
1385         install_partial_transition (super, answer,
1386                                     trcset[offset], offset * 32);
1387         rx_unlock_superstate (rx, super);
1388         return answer;
1389       }
1390     }
1391   else if (df->future) /* A cache miss on an edge with a future? Must be
1392                         * a semi-free destination. */
1393     {                           
1394       if (df->future->is_semifree)
1395         refresh_semifree_superstate (rx->cache, df->future);
1396       return &df->future_frame;
1397     }
1398   else
1399     /* no future superstate on an existing edge */
1400     {
1401       rx_lock_superstate (rx, super);
1402       if (!solve_destination (rx, df))
1403         {
1404           rx_unlock_superstate (rx, super);
1405           return 0;
1406         }
1407       if (!df->effects
1408           && (df->edge->options->next_same_super_edge[0] == df->edge->options))
1409         install_partial_transition (super, &df->future_frame,
1410                                     df->edge->cset[offset], offset * 32);
1411       rx_unlock_superstate (rx, super);
1412       return &df->future_frame;
1413     }
1414 }
1415
1416