You are on page 1of 430

The

Implementation of Icon and Unicon


a Compendium

Clinton Jeffery, editor

ii

The Implementation of Icon and Unicon

Ralph and Madge T. Griswold Kenneth W. Walker Clinton L. Jeffer

ii

Copyright 2013 Clinton Jeffery Permission is granted to copy, distrib te and!or modify this doc ment nder the terms of the "#U $ree %oc mentation &icense, 'ersion 1(2 or any later )ersion p blished by the $ree *oft+are $o ndation, +ith no In)ariant *ections, no $ront-Co)er Te.ts, and no /ac0-Co)er Te.ts( 1 copy of the license is incl ded in the section entitled 2"#U $ree %oc mentation &icense2( Portions of this doc ment 32The Implementation of the Icon Programming &ang age24 are in the p blic domain and not s b5ect to the abo)e copyright or license( Portions of this doc ment 321n 6ptimi7ing Compiler for Icon24 are copyrighted by 8enneth 9al0er and appear in edited form in this doc ment +ith the e.press permission of the a thor(

This is a draft man script dated :!20!2013( 5effery;cs( idaho(ed ( This doc ment +as prepared sing &ibre6ffice(org 3(<(

*end comments and errata to

iii

Contents
Preface(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((.i 6rgani7ation of This /oo0((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((.i 1c0no+ledgements(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((.i Compendi m Introd ction(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((1 =o+ >any Compilers?(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((1 Part I@ The Implementation of the Icon Programming &ang age(((((((((((((((((((((((((((((((((((((((((3 Chapter 1@ Introd ction((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((A 1(1 Implementing Programming &ang ages(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((A 1(2 The /ac0gro nd for Icon(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((B Chapter 2@ Icon &ang age 6)er)ie+((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((: 2(1 The Icon Programming &ang age((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((: 2(2 &ang age $eat res and the Implementation(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((31 CDCECI*C*((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((33 Chapter 3@ 6rgani7ation of the Implementation(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((3A 3(1 The Icon 'irt al >achine((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((3A 3(2 Components of the Implementation(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((3B 3(3 The Translator((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((3F 3(< The &in0er((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((3G 3(<(1 *cope Eesol tion(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((3G 3(<(2 Constr ction of E n-Time *tr ct res(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((3G 3(A The E n-Time *ystem((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((3G CDCECI*C*((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((3: Chapter <@ 'al es and 'ariables((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((<1 <(1 %escriptors(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((<2 <(1(1 *trings(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((<3 <(1(2 The # ll 'al e((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((<3 <(1(3 Integers((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((<3 <(3 'ariables(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((<A <(3(1 6perations on 'ariables((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((<B <(3(2 Trapped 'ariables(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((<F <(< %escriptors and /loc0s in C(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((<G <(<(1 %escriptors((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((<G <(<(2 /loc0s((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((<: <(<(3 %efined Constants(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((A0 <(<(< C Coding Con)entions((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((A1 CDCECI*C*((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((A3 Chapter A@ *trings and Csets((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((A< A(1 *trings((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((A< A(1(1 Eepresentation of *trings((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((A< A(1(2 Concatenation((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((AA A(1(3 * bstrings((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((AF A(1(< 1ssignment to * bscripted *trings((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((AG A(1(A >apping((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((A: A(2 Csets(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((B1 CDCECI*C*((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((B2 Chapter B@ &ists((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((B< B(1 *tr ct res for &ists(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((B< B(2 H e e and *tac0 1ccess((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((BF B(3 Positional 1ccess((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((F3

iv

Chapter F@ *ets and Tables(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((FB F(1 *ets(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((FB F(1(1 %ata 6rgani7ation for *ets((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((FB F(1(2 *et 6perations(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((FG F(2 Tables(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((F: F(2(1 %ata 6rgani7ation for Tables((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((F: F(3 =ashing $ nctions((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((G2 CDCECI*C*((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((G< Chapter G@ The Interpreter((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((GF G(1 *tac0-/ased C)al ation(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((GF G(2 'irt al >achine Instr ctions((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((GG G(2(1 Constants(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((GG G(2(2 Identifiers(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((G: G(3 6perators(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((:B G(2(< $ nctions(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((:F G(3 The Interpreter Proper(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((:G G(3( 1 The Interpreter &oop((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((:G Chapter :@ C.pression C)al ation((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((:: :(1 /o nded C.pressions(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((:: :(1(1 C.pression $rames(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((101 :(2 $ail re((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((102 :(3 "enerators and "oal-%irected C)al ation(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((10A :(< "enerati)e Control *tr ct res((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((11< :(<(1 1lternation(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((11< :(<(2 Eepeated 1lternation(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((11B :(<(3 &imitation((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((11B :(A Iteration((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((11F :(B *tring *canning((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((11G Chapter 10@ $ nctions, Proced res, and Co-C.pressions(((((((((((((((((((((((((((((((((((((((((((((((((123 10(1 In)ocation C.pressions((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((123 10(2 Proced re /loc0s(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((12< 10(3 In)ocation((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((12A 10(3(1 1rg ment Processing((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((12A 10(3(2 $ nction In)ocation(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((12F 10(3(3 Proced re In)ocation((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((12G 10(< Co-C.pressions((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((130 CDCECI*C*((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((13A Chapter 11@ *torage >anagement((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((13F 11(1 >emory &ayo t((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((13G 11(2 1llocation(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((1<0 11(2(1 The *tatic Eegion((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((1<0 11(2(2 /loc0s((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((1<1 11(2(3 *trings(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((1<1 11(3 "arbage Collection(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((1<2 11(3(1 The /asis(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((1<2 11(3(2 The &ocation Phase(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((1<3 11(3(3 Pointer 1d5 stment and Compaction(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((1<: 11(3(< Collecting Co-C.pression /loc0s((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((1AB 11(3(A C.pansion of the 1llocated Eegions((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((1AB 11(3(B *torage EeI irements d ring "arbage Collection((((((((((((((((((((((((((((((((((((((((1AF 11(< Predicti)e #eed((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((1AF

CDCECI*C*((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((1B0 Chapter 12@ E n-Time * pport 6perations(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((1B< 12(1 Type Chec0ing and Con)ersion((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((1B< 12(2 %ereferencing and 1ssignment(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((1BF 12(2(1 %ereferencing((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((1BG 12(2(2 1ssignment((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((1F0 12(3 Inp t and 6 tp t((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((1F< 12(3(1 $iles(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((1FA 12(3(2 Eeading and 9riting %ata(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((1FB 12(< %iagnostic $acilities(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((1FB CDCECI*C*((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((1FF Part II@ 1n 6ptimi7ing Compiler for Icon((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((1F: Preface to Part II((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((1G0 Chapter 13@ The 6ptimi7ing Compiler(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((1G1 13(1 >oti)ation((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((1G1 13(2 Type Inferencing((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((1G1 13(3 &i)eness 1nalysis ((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((1G3 13(< 1naly7ing "oal-%irected C)al ation((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((1G3 Chapter 1<@ The Translation >odel(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((1GA 1<(1 %ata Eepresentation(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((1GA 1<(2 Intermediate Ees lts(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((1GB 1<(3 C.ec table Code((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((1GF Chapter 1A@ The Type Inferencing >odel((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((1:3 1A(1 >oti)ation((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((1:3 1A(2 1bstract Interpretation ((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((1:< 1A(3 Collecting *emantics((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((1:A 1A(< >odel 1@ Climinating Control $lo+ Information((((((((((((((((((((((((((((((((((((((((((((((((((1:G 1A(A >odel 2@ %eco pling 'ariables((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((200 1A(B >odel 3@ 1 $inite Type *ystem((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((202 Chapter 1B@ &i)eness 1nalysis of Intermediate 'al es(((((((((((((((((((((((((((((((((((((((((((((((((((((20A 1B(1 Implicit &oops((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((20A 1B(2 &i)eness 1nalysis(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((20F 1B(3 1n 1ttrib te "rammar(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((210 1B(< Primary C.pressions(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((211 1B(A 6perations +ith * be.pressions(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((211 1B(B Control *tr ct res((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((213 Chapter 1F@ 6)er)ie+ of the Compiler((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((21B 1F(1 Components of the Compiler(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((21B 1F(2 The E n-time *ystem(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((21B 1F(3 The Implementation &ang age ((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((21F 1F(< *tandard and Tailored 6peration Implementations((((((((((((((((((((((((((((((((((((((((((((((220 Chapter 1G@ 6rgani7ation of Iconc(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((221 1G(1 Compiler Phases(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((221 1G(2 #ai)e 6ptimi7ations(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((222 1G(3 Code "eneration for Proced res((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((223 Chapter 1:@ The Implementation of Type Inferencing((((((((((((((((((((((((((((((((((((((((((((((((((((((22A 1:(1 The Eepresentation of Types and *tores((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((22A 1:(2 1 $ ll Type *ystem((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((22B 1:(3 Proced re Calls and Co-C.pression 1cti)ations((((((((((((((((((((((((((((((((((((((((((((((((((230 1:(< The $lo+ "raph and Type Comp tations((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((231 Chapter 20@ Code "eneration(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((23A

vi

20(1 Translating Icon C.pressions(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((23F 20(2 *ignal =andling((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((2<0 20(3 Temporary 'ariable 1llocation((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((2<2 Chapter 21@ Control $lo+ 6ptimi7ations(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((2<G 21(1 #ai)e Code "eneration((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((2<G 21(2 * ccess Contin ations((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((2<G 21(3 IconcJs Peephole 6ptimi7er((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((2A0 Chapter 22@ 6ptimi7ing In)ocations(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((2A3 22(1 In)ocation of Proced res(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((2A3 22(2 In)ocation and In-lining of / ilt-in 6perations((((((((((((((((((((((((((((((((((((((((((((((((((((2A3 22(3 =e ristic for %eciding to In-line(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((2AA 22(< In-lining * ccess Contin ations(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((2AB 22(A Parameter Passing 6ptimi7ations(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((2AF 22(B 1ssignment 6ptimi7ations((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((2A: Chapter 23@ Performance of Compiled Code((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((2B2 23(1 C.pression 6ptimi7ations(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((2B2 23(2 Program C.ec tion *peed((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((2B< 23(3 Code *i7e(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((2BA Chapter 2<@ $ t re 9or0 on the Compiler(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((2BF 2<(1 * mmary((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((2BF 2<(2 $ t re 9or0(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((2BF Chapter 2A@ 6ptimi7ing the Icon Compiler((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((2F1 2A(1 Introd ction((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((2F1 1reas 9here Iconc Can /e Impro)ed((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((2F1 Changes to the Compiler *o rce((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((2F2 2A(2 6ptimi7ing the Type Eepresentation((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((2F3 #e+ Type Eepresentation((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((2F< =o+ Type 1llocation 9or0s((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((2FA Eeorgani7ing the Code(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((2FB #e+ $ nctions((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((2FB 6ther Changes((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((2FF Ees lts of Type 6ptimi7ation(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((2FG 2A(3 6ptimi7ing the "enerated Code(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((2FG Intermediate Code Eepresentation(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((2FG Eed ndant $ nction Calls(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((2G1 Icon &iterals and Constant Propagation(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((2G2 #e+ $ nctions((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((2GB 'ariable Initiali7ation((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((2GF &oop Unrolling(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((2GF Ees lts of Code "eneration 6ptimi7ations(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((2GG 2A(< Ees lts((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((2GG Type Eepresentation(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((2G: Code "eneration(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((2:0 1nalysis of Intermediate Code 6ptimi7ations((((((((((((((((((((((((((((((((((((((((((((((((((((((((((2:2 $ t re 6ptimi7ations((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((2:3 Part III@ The Implementation of Unicon(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((2:F Chapter 2B@ The Unicon Translator((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((2:: 2B(1 6)er)ie+((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((2:: 2B(2 &e.ical 1nalysis((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((2:: 2B(3 The Unicon Parser((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((30A *ynta. Crror =andling (((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((30B

vii

2B(< The Unicon Preprocessor(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((30B 2B(A *emantic 1nalysis(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((30G 2B(B 6b5ect 6riented $acilities ((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((312 Implementing > ltiple Inheritance in Unicon (((((((((((((((((((((((((((((((((((((((((((((((((((((((((31A UniconJs Progend34 re)isited (((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((31F 6ther 66P Iss es(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((31G 1n 1side on P blic Interfaces and E ntime Type Chec0ing (((((((((((((((((((((((((((((((((((31G Chapter 2F@ Portable 2% and 3% "raphics(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((320 2F(1 9indo+ *ystems and Platform-Independence(((((((((((((((((((((((((((((((((((((((((((((((((((((320 2F(2 *tr ct res %efined in graphics(h((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((321 2F(3 Platform >acros and Coding Con)entions((((((((((((((((((((((((((((((((((((((((((((((((((((((((((322 2F(< 9indo+ >anip lation in r.+in(ri and rms+in(ri((((((((((((((((((((((((((((((((((((((((((((((((((323 9indo+ Creation and %estr ction(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((323 C)ent Processing((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((32< Eeso rce >anagement(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((32< >emory >anagement and rKrsc(ri $iles(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((32A Color >anagement(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((32A $ont >anagement(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((32A 2F(B C.ternal Image $iles and $ormats(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((32A 2F(F Implementation of 3% $acilities(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((32A 3% $acilities EeI irements ((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((32B $iles((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((32B Eedra+ing 9indo+s((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((32B Te.t res((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((32B Te.t re Coordinates(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((32F 2F(G "raphics $acilities Porting Eeference((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((32G 2B(: The D Implementation((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((33F 2B(10 The >* 9indo+s Implementation(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((33F Installing, Config ring, and Compiling the *o rce Code(((((((((((((((((((((((((((((((((((((((((33G Chapter 2G@ #et+or0ing, >essaging and the P6*ID Interface((((((((((((((((((((((((((((((((((((((((3<0 2G(1 #et+or0ing $acilities(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((3<0 2G(2 >essaging $acilities(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((3<0 The Transfer Protocol &ibrary((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((3<0 &ibtp 1rchitect re((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((3<0 The %iscipline(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((3<0 C.ception =andling((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((3<1 Part I'@ 1ppendi.es(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((3<< 1ppendi. 1@ %ata *tr ct res(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((3<B 1(1 %escriptors((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((3<B 1(1(1 'al es(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((3<B 1(1(2 'ariables(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((3<F 1(2 /loc0s((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((3<F 1(2(1 &ong Integers(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((3<F 1(2(2 Eeal # mbers(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((3<F 1(2(3 Csets(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((3<F 1(2(< &ists((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((3<G 1(2(A *ets(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((3<: 1(2(B Tables(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((3A0 1(2(F Proced res((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((3A0 1(2(G $iles((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((3A2 1(2(: Trapped 'ariables(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((3A2

viii

1(2(10 Co-C.pressions(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((3A3 1ppendi. /@ 'irt al >achine Instr ctions(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((3AF 1ppendi. C@ 'irt al >achine Code(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((3B1 C(1 Identifiers(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((3B1 C(2 &iterals(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((3B1 C(3 8ey+ords(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((3B2 C(< 6perators(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((3B2 C(A Calls(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((3B< C(B Compo nd C.pressions and Con5 nction((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((3B< C(F *election C.pressions((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((3BA C(G #egation((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((3BB C(: "enerati)e Control *tr ct res(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((3BB C(10 &oops(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((3BG C(11 *tring *canning((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((3B: C(12 Proced re Eet rns(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((3F0 C(13 Co-C.pression Creation(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((3F0 1ppendi. %@ 1dding $ nctions and %ata Types((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((3F2 %(1 $ile 6rgani7ation(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((3F2 %(2 1dding $ nctions((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((3F2 %(2(1 $ nction %eclarations(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((3F3 %(2(2 Eet rning from a $ nction((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((3F3 %(2(3 Type Chec0ing and Con)ersion((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((3FA %(2(< Constr cting #e+ %escriptors(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((3FB %(2(A %efa lt 'al es((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((3FB %(2(B *torage 1llocation((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((3FF %(2(F *torage >anagement Considerations(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((3FG %(2(G Crror Termination(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((3FG %(2(: =eader $iles((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((3F: %(2(10 Installing a #e+ $ nction(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((3F: %(3 1dding %ata Types((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((3F: %(3(1 Type Codes(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((3G0 %(3(2 *tr ct res(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((3G0 %(3(3 Information #eeded for *torage >anagement(((((((((((((((((((((((((((((((((((((((((((((((3G0 %(3(< Changes to C.isting Code((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((3G1 %(<(1 %efined Constants((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((3G3 %(<(2 >acros((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((3G3 %(A * pport Eo tines(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((3G< %(A(1 Comparison((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((3G< 1ppendi. C@ Pro5ects((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((3GB 1ppendi. $@ *ol tions to *elected C.ercises(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((3GF 1ppendi. "@ The ET& E n-Time &ang age (((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((3GG "(1 6peration %oc mentation ((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((3GG "(2 Types of 6perations (((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((3GG "(3 %eclare Cla se(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((3:0 "(< 1ctions (((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((3:1 Type Chec0ing and Con)ersions((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((3:1 *cope of Con)ersions(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((3:< Type #ames((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((3:A Incl ding C Code((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((3:B Crror Eeporting((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((3:B 1bstract Type Comp tations((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((3:B

ix

C C.tensions(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((<00 Interface 'ariables((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((<00 %eclarations((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((<01 Type Con)ersions!Type Chec0s(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((<02 *ignaling E n-time Crrors((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((<02 Eet rn *tatements((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((<02 "#U $ree %oc mentation &icense((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((<0< Eeferences((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((<0F Inde.(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((<11

xi

Preface
This boo0 is a compendi m of all doc ments that describe the implementation of the Icon and Unicon programming lang ages, an implementation that started +ith Icon )ersion 3 on a P%P-11 sometime near the year 1:G0(

Organization of This Book


This boo0 consists of fo r parts( The first part, Chapters 1-12, present the core of the implementation, foc sing on the Icon )irt al machine interpreter and r ntime system( This material +as formerly p blished as the Implementation of the Icon Programming &ang age, by Ealpha and >adge T( "ris+old, at that time it doc mented Icon 'ersion B( >any of the details in this boo0 became obsolete +ith the re+riting of the r ntime system for Icon 'ersion G( 1fter sleeping for abo t nine years on the I estion of +hether to preser)e the original te.t as-is and annotating it incredibly hea)ily in side-margins, I ha)e elected to preser)e the a thorsJ style and intent, +hile pdating it to doc ment Icon 'ersion :(A and Unicon 'ersion 12( I se bl e-colored te.t +hen necessary to disc ss Unicon iss es and differences, so that Part I sho ld remain sef l to people +ho prefer to se the Icon implementation, not 5 st those +or0ing +ith Unicon( Part II, in Chapters 13-1:, describes the optimi7ing compiler, iconc, and the str ct ring of the r ntime system to s pport it( This +or0 is the brainchild of 8en 9al0er, +hose dissertation is presented here, along +ith his technical reports describing the r ntime lang age ET& and its translator, rtt( 8en 9al0erJs compiler has been enhanced significantly by t+o persons@ 1nthony Jones did a /(*( =onors thesis at UT*1 on space red ction techniI es that red ce the space cost of type inferencing by 2!3rds, +hile >i0e 9ilder did an >(*( at #>*U and some follo+-on +or0 at Idaho on adapting iconc to +or0 +ith Unicon and )ice )ersa( These contrib tions belong logically to Part II( Part III describes the implementation of Unicon and the many e.tensions that transformed the lang age from a string-and-list-processing lang age into a modern ob5ect-oriented, net+or0-sa))y, graphics-rich applications lang age( Part I' consists of essential reference material presented in se)eral 1ppendi.es(

Acknowledgments
This boo0 +o ld not be possible +itho t the genero s contrib tions and consent of the primary a thors of the Icon lang age implementation doc ments, Ealph and >adge "ris+old, and 8enneth 9al0er( Ealph "ris+old re-scanned and corrected his Icon implementation boo0 man script in order to place it in the p blic domain on the +eb, a large, selfless, than0less, and )al able nderta0ing( 8en 9al0er fo nd and shared his original nroff dissertation so rce files( * sie Jeffery pro)ided cr cial assistance in the 6CE reconstr ction of Icon implementation boo0 man script from the p blic domain scanned images( >i0e 8emp +as a )al able )ol nteer proofreader in that effort( Eesponsibility for remaining typographical errors rests +ith me(

xii

Than0s to the rest of the people +ho contrib ted code to the Icon Pro5ect o)er a period of many years and many Ph(%( and >(*( degrees( The editor +ishes to ac0no+ledge genero s s pport from the #ational &ibrary of >edicine( This +or0 +as also s pported in part by the #ational *cience $o ndation nder grants C%1-:B332::, CI1-0220A:0 and CI1-:G10F32, and the 1lliance for >inority Participation( Clinton Jeffery, >osco+ I%, *eptember 2013 Acknowledgments for Chapters 1-12 The implementation of Icon described in Part I o+es m ch to pre)io s +or0 and in partic lar to implementations of earlier )ersions of Icon( >a5or contrib tions +ere made by Cary Co tant, %a)e =anson, Tim 8orb, /ill >itchell, a *te)e 9ampler( 9alt =ansen, Eob >cConeghy, and Janalee 6J/agy also made significant contrib tions to this +or0( The present system has benefited greatly from persons +ho ha)e installed Icon on a )ariety of machines and operating systems( Eic0 $onoro+, /ob "oldberg, Chris Janton, >ar0 &angley, Eob >cConeghy, /ill >itchell, Janal 6J/agy, John Polstra, "regg To+nsend, and Cheyenne 9ills ha)e made s bstantial contrib tions in this area( The s pport of the #ational *cience $o ndation nder "rants >C*F 013:F, >C*F:03G:0, >C*G1-0l:1B, %CE-G32013G, %CE-G<01G3I, at %CE-GA0201A +as instr mental in the original conception of Icon and has bee in)al able in its s bseI ent de)elopment( 1 n mber of persons contrib ted to this boo0( %a)e " deman, %a)e =a son, /ill >itchell, Janalee 6J/agy, "regg To+nsend, and 1lan 9endt contr b ted to the e.ercises that appear at the ends of chapters and the pro5ects gi)en 1ppendi. C( 8athy C mmings, /ill "ris+old, /ill >itchell, 8atie >orse, >il Tharp, and "regg To+nsend ga)e the man script caref l readings and made n mero s s ggestions( Janalee 6J/agy not only read the man script b t also s pplied concepts for presenting and +riting the material on e.pression e)al ation( $inally, %a)e =anson ser)ed as an enth siastic series editor for this boo0( =is percepti)e reading of the man script and his s pporti)e and constr cti)e s ggestions made a significant contrib tion to the final res lt( Acknowledgments for Chapters 13-24 I +o ld li0e to than0 Ealph "ris+old for acting as my research ad)isor( =e pro)ided the balance of g idance, s pport, and freedom needed for me to complete this research( $rom him I learned many of the technical +riting s0ills I needed to compose this dissertation( I am indebted to him and the other members of the Icon Pro5ect +ho o)er the years ha)e contrib ted to the Icon programming lang age that ser)es as a fo ndation of this research( I +o ld li0e to than0 Peter %o+ney and *a mya %ebray for also ser)ing as members on my committee and for pro)iding insightf l criticisms and s ggestions for this dissertation( In addition, *a mya %ebray shared +ith me his 0no+ledge of abstract interpretation, gi)ing me the tool I needed to shape the final form of the type inferencing system( I ha)e recei)ed help from a n mber of my fello+ grad ate st dents both +hile they +ere still st dents and from some after they grad ated( Clinton Jeffery, #ic0 8line, and Peter /igot proofread this dissertation, pro)iding helpf l comments( *imilarly, Janalee 6J/agy,

xiii

8el)in #ilsen, and %a)id " deman proofread earlier reports that ser)ed as a basis for se)eral of the chapters in this dissertation( Janalee 6J/agyJs o+n +or0 on compiling Icon pro)ided a fo ndation for the compiler I de)eloped( 8el)in #ilsen applied my li)eness analysis techniI es to a slightly different implementation model, pro)iding insight into dependencies on e.ec tion models(

Compendium Introduction
The implementation of the Icon programming lang age is no+ old( It inherits ideas from earlier lang ages, and introd ces many of its o+n( The implementation doc mentation traditionally re)ol)ed aro nd the )irt al machine and its r ntime system, other parts of the implementation +ere doc mented in scattered technical reports or not at all, other than the so rce code( This )ol me changes all that, by bringing all the implementation doc ments together in a single )ol me( IconJs p blic-domain implementation is fairly efficient, for e.ample at one point 8eith 9aclena of the Uni)ersity of Chicago doc mented a factor of < or more speed ad)antage of Icon )ers s Python on m ltiple benchmar0s, and that +as for the Icon )irt al machine interpreter, the Icon optimi7ing compiler adds another factor of 2-A or more in faster e.ec tion speed( The design decisions that achie)e IconJs )ery-high le)el lang age feat res 3s ch as generators and goal-directed e)al ation4 +ith acceptable performance ma0e for an interesting st dy( This boo0 is intended for those +anting to learn the implementation in order to add feat res, impro)e performance, learn abo t compilers in general, or glean ideas for their o+n independent programming lang age efforts( Icon traditionally consisted of a )irt al machine translator, a lin0er, and a )irt al machine interpreter( This early organi7ation +as later s pplemented +ith additional components, +hile some e.isting components +ere merged( The big added components are the optimi7ing compiler +ritten by 8en 9al0er, and the Unicon translator +ritten by Clint Jeffery( These added components no+ constit te more than half of the story( The trends I hope to see in the f t re are@ merger of components, and grad al replacement of C-based components +ith ones +ritten in Unicon(

How Many Compilers?


The fig re belo+ sho+s t+o symmetrically-organi7ed sets of tools( The tools on the left are the compilers end- sers employ to translate Icon or Unicon into e.ec table machine code, +hile the tools on the right sho+ ho+ the nderlying r n-time system needed in order to e.ec te those programs is b ilt( 6f the si. rectangles, fo r are compilers that perform distinct tas0s specific to this programming lang age family( The front-end translation tool, named nicon, is a preprocessor that translates Unicon code into Icon code( Its primary f nctions are to translate ob5ect-orientation 3classes, single and m ltiple inheritance, and pac0ages4 do+n to nderlying imperati)e constr cts( Unicon is +ritten in Unicon( Icont and iconc compile Icon code do+n to )irt al machine and C code, respecti)ely( They share a fe+ common front-end components, b t are largely independent( Icon. is the name of the Icon 3and Unicon4 )irt al machine, +hich mostly consists of a large collection of comple. high-le)el data str ct re and I!6 facilities +hich are b ilt-in to these lang ages( >ost of the so rce code for icon. is also sed in rt(a, the r ntime library that is lin0ed to Icon programs compiled +ith iconc( icont nicon iconc rtt rt(a icon.

$ig re CI-1@ three compilers for sers 3left4, one 3rtt4 for the lang age implementors

Part I: The Implementation of the Icon Programming Language


by Ealph "ris+old and >adge "ris+old disc ssion pdated to Icon :(A so rce code by Clint Jeffery

Chapter 1: Introduction
PCE*PCCTI'C@ The implementation of comple. soft+are systems is a fascinating s b5ect-and an important one( Its theoretical and practical aspects occ py the attention and energy of many persons, and it cons mes )ast amo nts of comp tational reso rces( In general terms, it is a broad s b5ect ranging from operating systems to programming lang ages to database systems to real-time control systems, and so on( Past +or0 in these areas has res lted in an increasingly better nderstanding of implementation techniI es, more sophisticated and efficient systems, and tools for a tomating )ario s aspects of soft+are prod ction( %espite these ad)ances, the implementation of comple. soft+are systems remains challenging and e.citing( The problems are diffic lt, and e)ery ad)ance in the state of the art brings ne+ and more diffic lt problems +ithin reach( Part I of this boo0 addresses a )ery small portion of the problem of implementing comple. soft+are systemsthe implementation of a )ery high-le)el programming lang age that is oriented to+ard the manip lation of str ct res and strings of characters( In a narro+ sense, this boo0 describes an implementation of a specific programming lang age, Icon( In a broader sense, it deals +ith a lang age-design philosophy, an approach to implementation, and techniI es that apply to the implementation of many programming lang ages as +ell as related types of soft+are systems( The foc s of this boo0 is the implementation of programming lang age feat res that are at a high concept al le)elfeat res that are easy for h man beings to se as opposed to feat res that fit comfortably on con)entional comp ter architect res( The orientation of the implementation is generality and fle.ibility, rather than ma.im m efficiency of e.ec tion( The problem domain is strings and str ct res rather than n mbers( It is these aspects that set the implementation of Icon apart from more con)entional programminglang age implementations(

1.1 mplementing !rogramming "ang#ages


In con)entional programming lang ages, most of the operations that are performed +hen a program is e.ec ted can be determined, statically, by e.amining the te.t of the program( In addition, the operations of most programming lang ages ha)e a fairly close correspondence to the architect ral characteristics of the comp ters on +hich they are implemented( 9hen these conditions are met, so rce-code constr cts can be mapped directly into machine instr ctions for the comp ter on +hich they are to be e.ec ted( The term compilation is sed for this translation process, and most persons thin0 of the implementation of a programming lang age in terms of a compiler( 9riting a compiler is a comple. and diffic lt tas0 that reI ires speciali7ed training, and the s b5ect of compilation has been st died e.tensi)ely 39aite and "oos, 1:G<, 1ho, &am, *ethi and Ullman 200B4( >ost of the iss es of data representation and code generation are comparati)ely +ell nderstood, and there are no+ many tools for a tomating portions of the compiler-+riting tas0 3&es0 1:FA, Johnson 1:FA4( In addition to the compiler proper, an implementation of a programming lang age s ally incl des a r n-time component that contains s bro tines for performing comp tations that are too comple. to compile in-line, s ch as inp t, o tp t, and mathematical f nctions(

*ome programming lang ages ha)e feat res +hose meanings cannot be determined statically from the te.t of a so rce-lang age program, b t +hich may change d ring program e.ec tion( * ch feat res incl de changes in the meaning of f nctions d ring e.ec tion, the creation of ne+ data types at r n time, and self-modifying programs( *ome programming lang ages also ha)e feat res, s ch as pattern matching, that do not ha)e correspondences in the architect re of con)entional comp ters( In s ch cases, a compiler cannot translate the so rce program directly into e.ec table code( 'ery high-le)el operations, s ch as pattern matching, and feat res li0e a tomatic storage management significantly increase the importance and comple.ity of the r n-time system( $or lang ages +ith these characteristics--lang ages s ch as 1P&, &I*P, *#6/6&<, *CT&, Prolog, and Icon-m ch of the s bstance of the implementation is in the r n-time system rather than in translation done by a compiler( 9hile compiler +riting is relati)ely +ell nderstood, r ntime systems for most programming lang ages +ith dynamic feat res and )ery high-le)el operations are not( Programming lang ages +ith dynamic aspects and no)el feat res are li0ely to become more important rather than less important( %ifferent problems benefit from different ling istic mechanisms( #e+ applications place different )al es on speed of e.ec tion, memory reI irements, I ic0 sol tions, programmer time and talent, and so forth( $or these reasons, programming lang ages contin e to proliferate( #e+ programming lang ages, by their nat re, introd ce ne+ feat res( 1ll of this creates diffic lties for the implementer( &ess of the effort in)ol)ed in implementations for ne+ lang ages lies in the comparati)ely familiar domain of compilation and more lies in ne+ and ne.plored areas, s ch as pattern matching and no)el e.pression-e)al ation mechanisms( The programming lang ages that are the most challenging to implement are also those that differ most from each other( #e)ertheless, there are nderlying principles and techniI es that are generally applicable, and e.isting implementations contain many ideas that can be sed or e.tended in ne+ implementations(

1.$ The Backgro#nd for con


/efore describing the Icon programming lang age and its implementation, some historical conte.t is needed, since both the lang age and its implementation are strongly infl enced by earlier +or0( Icon has its roots in a series of programming lang ages that bear the name *#6/6&( The first *#6/6& lang age +as concei)ed and implemented in the early 1:B0s at /ell Telephone &aboratories in response to the need for a programming tool for manip lating strings of characters at a high concept al le)el 3$arber, "ris+old, and Polons0y 1:B<4( It emphasi7ed ease of programming at the e.pense of efficiency of e.ec tion, the programmer +as considered to be a more )al able reso rce than the comp ter( This rather primiti)e lang age pro)ed to be pop lar, and it +as follo+ed by s ccessi)ely more sophisticated lang ages@ *#6/6&2, *#6/6&3 3$arber, "ris+old, and Polons0y 1:BB4, and finally *#6/6&< 3"ris+old, Poage, and Polons0y 1:F14( Thro gho t the de)elopment of these lang ages, the design emphasis +as on ease of programming rather than on ease of implementation 3"ris+old 1:G14( Potentially )al able feat res +ere not discarded beca se they might be inefficient or diffic lt to implement( The aggressi)e p rs it of this philosophy led to n s al lang age feat res and to challenging implementation problems(

*#6/6&< still is in +ide se( Considering its early origins, some of its facilities are remar0ably ad)anced( It feat res a pattern-matching facility +ith bac0trac0ing control str ct res that effecti)ely constit tes a s blang age( *#6/6&< also has a )ariety of data str ct res, incl ding tables +ith associati)e loo0 p( $ nctions and operators can be defined and redefined d ring program e.ec tion( Identifiers can be created at r n-time, and a program can e)en modify itself by means of r n-time compilation( #eedless to say, *#6/6&< is a diffic lt lang age to implement, and most of the con)entional compilation techniI es ha)e little applicability to it( Its initial implementation +as, nonetheless, s fficiently s ccessf l to ma0e *#6/6&< +idely a)ailable on machines ranging from large mainframes to personal comp ters 3"ris+old 1:F24( * bseI ent implementations introd ced a )ariety of cle)er techniI es and fast, compact implementations 3*antos 1:F1, "impel 1:F2a, %e+ar and >cCann 1:FF4( The lesson here is that the design of programming lang ages sho ld not be o)erly inhibited by percei)ed implementation problems, since ne+ implementation techniI es often can be de)ised to sol)e s ch problems effecti)ely and efficiently( It is +orth noting that the original implementation of *#6/6&< +as carried o t concomitantly +ith lang age design( The implementation +as s fficiently fle.ible to ser)e as a research tool in +hich e.perimental lang age feat res co ld be incorporated easily and tested before they +ere gi)en a permanent place in the lang age( 9or0 on the *#6/6& lang ages contin ed at the Uni)ersity of 1ri7ona in the early 1:F0s( In 1:FA, a ne+ lang age, called *&A 32*#6/6& &ang age A24, +as de)eloped to allo+ e.perimentation +ith a +ider )ariety of programming-lang age constr cts, especially a sophisticated proced re mechanism 3"ris+old and =anson, 1:FF, =anson and "ris+old 1:FG4( *&A e.tended earlier +or0 in pattern matching, b t pattern matching remained essentially a s blang age +ith its o+n control str ct res, separate from the rest of the lang age( The inspiration for Icon came in 1:FB +ith a reali7ation that the control str ct res that +ere so sef l in pattern matching co ld be integrated +ith con)entional comp tational control str ct res to yield a more coherent and po+erf l programming lang age( The first implementation of Icon 3"ris+old and =anson 1:F:4 +as +ritten in Eatfor, a preprocessor for $ortran that s pports str ct red programming feat res 38ernighan 1:FA4( Portability +as a central concern in this implementation( The implementation of Icon described in this boo0 is a s ccessor to that first implementation( It borro+s m ch from earlier implementations of *#6/6&<, *&A, and the Eatfor implementation of Icon( 1s s ch, it is a distillation and refinement of implementation techniI es that ha)e been de)eloped o)er a period of more than t+enty years(

"

Chapter 2: Icon Language $vervie%


PCE*PCCTI'C@ The implementer of a programming lang age needs a considerably different nderstanding of the lang age from the persons +ho are going to se it( 1n implementer m st ha)e a deep nderstanding of the relationships that e.ist among )ario s aspects of the lang age and a precise 0no+ledge of +hat each operation means( *pecial cases and details often are of partic lar importance to the implementer( Users of a lang age, on the other hand, m st 0no+ ho+ to se feat res to accomplish desired res lts( They often can get by +ith a s perficial 0no+ledge of the lang age, and they often can se it effecti)ely e)en if some aspects of the lang age are mis nderstood( Users can ignore parts of the lang age that they do not need( Idiosyncrasies that plag e the implementer may ne)er be enco ntered by sers( Con)ersely, a detail the implementer o)erloo0s may bede)il sers( $ rthermore, the implementer may also need to anticipate +ays in +hich sers may apply some lang age feat res in inefficient and inappropriate +ays( Part I of this boo0 is abo t the implementation of 'ersion : of Icon( The description that follo+s concentrates on aspects of the lang age that are needed to nderstand its implementation( 9here there are se)eral similar operations or +here the operations are similar to those in +ell-0no+n programming lang ages, only representati)e cases or highlights are gi)en( 1 complete description of Icon for the ser is contained in "ris+old and "ris+old 31::F4( Icon is an n s al programming lang age, and its n s al feat res are +hat ma0e its implementation challenging and interesting( The interesting feat res are semantic, not syntactic, they are part of +hat the lang age can do, not part of its appearance( *yntactic matters and the +ay they are handled in the implementation are of little interest here( The description that follo+s indicates synta. mostly by e.ample( This chapter is di)ided into t+o ma5or parts( The first part describes the essential aspects of Icon( The second part disc sses those aspects of Icon that present the most diffic lt implementation problems and that affect the nat re of the implementation in the most significant +ays(

$.1 The con !rogramming "ang#age


Icon is con)entional in many respects( It is an imperati)e, proced ral lang age +ith )ariables, operations, f nctions, and con)entional data types( Its no)el aspects lie in its emphasis on the manip lation of strings and str ct res and in its e.pression-e)al ation mechanism( 9hile m ch of the e.ec tion of an Icon program has an imperati)e fla)or, there also are aspects of logic programming( There are no type declarations in Icon( Instead, )ariables can ha)e any type of )al e( *tr ct res may be heterogeneo s, +ith different elements ha)ing )al es of different types( Type chec0ing is performed d ring program e.ec tion, and a tomatic type con)ersion is pro)ided( *e)eral operations are polymorphic, performing different operations depending on the types of their arg ments( *trings and str ct res are created d ring program e.ec tion, instead of being declared and allocated d ring compilation( *tr ct res ha)e pointer semantics, a str ct re )al e is a pointer to an ob5ect( *torage management is a tomatic( >emory is allocated as reI ired, and garbage collection is performed +hen necessary( C.cept for the practical

1&

considerations of comp ter architect re and the amo nt of a)ailable memory, there are no limitations on the si7es of ob5ects( 1n Icon program consists of a series of declarations for proced res, records, and global identifiers( Icon has no bloc0 str ct re( *coping is static@ identifiers either are global or are local to proced res( Icon is an e.pression-based lang age +ith reser)ed-+ord synta.( It resembles C in appearance, for e.ample 38ernighan and Eitchie 1:FG4( 2.1.1 Data Types Icon has many types of data--incl ding se)eral that are not fo nd in most programming lang ages( In addition to the s al integers and real 3floating-point4 n mbers, there are strings of characters and sets of characters 3csets4( There is no character data type, and strings of characters are data ob5ects in their o+n right, not arrays of characters( There are fo r str ct re data types that comprise aggregates of )al es@ lists, sets, tables, and records( &ists pro)ide positional access 3li0e )ectors4, b t they also can be manip lated li0e stac0s and I e es( *ets are nordered collections of )al es on +hich the s al set operations can be performed( Tables can be s bscripted +ith any 0ind of )al e and pro)ide an associati)e-access mechanism( Eecords are aggregates of )al es that can be referenced by name( Eecord types also add to the b ilt-in type repertoire of Icon( The n ll )al e ser)es a special p rpose, all )ariables ha)e the n ll )al e initially( The n ll )al e is illegal in most comp tational conte.ts, b t it ser)es to indicate defa lt )al es in a n mber of sit ations( The 0ey+ord Ln ll prod ces the n ll )al e( 1 so rce-lang age file is a data )al e that pro)ides an interface bet+een the program and a data file in the en)ironment in +hich the program e.ec tes( Proced res also are data )al es---2first-class data ob5ects2 in &I*P parlance( Proced res can be assigned to )ariables, transmitted to and ret rned from f nctions, and so forth( There is no method for creating proced res d ring program e.ec tion, ho+e)er( $inally, there is a co-e.pression data type( Co-e.pressions are the e.pression-le)el analog of coro tines( The importance of co-e.pressions is deri)ed from IconJs e.pressione)al ation mechanism( Icon has )ario s operations on different types of data( *ome operations are polymorphic and accept arg ments of different types( $or e.ample, type3.4 prod ces a string corresponding to the type of .( *imilarly, copy3.4 prod ces a copy of ., regardless of its type( 6ther operations only apply to certain types( 1n e.ample is@
*x

+hich prod ces the si7e of ., +here the )al e of . may be a string, a str ct re, and so on( 1nother e.ample is ?., +hich prod ces a randomly selected integer bet+een 1 and ., if . is an integer, b t a randomly selected one-character s bstring of . if . is a string, and so on( In other cases, different operations for similar 0inds of comp tations are syntactically disting ished( $or e.ample,
i = j

compares the n meric )al es of i and 5, +hile


s1 == s2

11

compares the string )al es of s1 and s2( There is also a general comparison operation that determines +hether any t+o ob5ects are the same@
x1 === x2

1s mentioned pre)io sly, any 0ind of )al e can be assigned to any )ariable( $or e.ample, . might ha)e an integer )al e at one time and a string )al e at another@
x := 3 ...

x := "hello" Type chec0ing is performed d ring program e.ec tion( $or e.ample, in
i := x + 1

the )al e of . is chec0ed to be s re that it is n meric( If it is not n meric, an attempt is made to con)ert it to a n meric type( If the con)ersion cannot be performed, program e.ec tion is terminated +ith an error message( 'ario s con)ersions are s pported( $or e.ample, a n mber al+ays can be con)erted to a string( Th s,
write(*s)

a tomatically con)erts the integer ret rned by Ks to a string for the p rpose of o tp t( There also are e.plicit type-con)ersion f nctions( $or e.ample,
s1 := string(*s2)

assigns to s1 a string corresponding to the si7e of s2( 1 string can be con)erted to a n mber if it has the synta. of a n mber( Th s,
i := i + "20"

prod ces the same res lt as


i := i + 20

1 gmented assignments are pro)ided for binary operations s ch as the pre)io s one, +here assignment is made to the same )ariable that appears as the left arg ment of the operation( Therefore, the pre)io s e.pression can be +ritten more concisely as
i +:= 20

Icon also has the concept of a n meric type, +hich can be either an integer or a real 3floating-point4 n mber( 2.1.2 Expression Evaluation In most programming lang ages---1lgol, Pascal, P&!I, and C, for e.ample---the e)al ation of an e.pression al+ays prod ces e.actly one res lt( In Icon, the e)al ation of an e.pression may prod ce a single res lt, it may prod ce no res lt at all, or it may prod ce a seI ence of res lts( !"ccess and #ail"re( Con)entional operations in Icon prod ce one res lt, as they do in most programming lang ages( $or e.ample,
i + j

prod ces a single res lt, the s m of the )al es of i and 5( =o+e)er, a comparison operation s ch as
i > j

12

prod ces a res lt 3the )al e of 54 if the )al e of i is greater than the )al e of 5 b t does not prod ce a res lt if the )al e of i is not greater than 5( 1n e.pression that does not prod ce a res lt is said to fail, +hile an e.pression that prod ces a res lt is said to succeed. * ccess and fail re are sed in se)eral control str ct res to control program flo+( $or e.ample,
if i > j then write(i) else write(j)

+rites the ma.im m of i and 5( #ote that comparison operations do not prod ce /oolean )al es and that /oolean )al es are not sed to dri)e control str ct res( Indeed, Icon has no /oolean type( "enerally spea0ing, an operation that cannot perform a comp tation does not prod ce a res lt, and hence it fails( $or e.ample, type-con)ersion f nctions fail if the con)ersion cannot be performed( 1n e.ample is n meric3.4, +hich con)erts . to a n meric )al e if possible, b t fails if the con)ersion cannot be performed( $ail re of an e.pression to prod ce a res lt does not indicate an error( Instead, fail re indicates that a res lt does not e.ist( 1n e.ample is pro)ided by the f nction find3s1, s24, +hich prod ces the position of s1 as a s bstring of s2 b t fails if s1 does not occ r in s2( $or e.ample,
find("it", "They sit like bumps on a log.")

prod ces the )al e F 3positions in strings are co nted starting at 14( =o+e)er,
find("at", "They sit like bumps on a log.")

does not prod ce a res lt( *imilarly, read3f4 prod ces the ne.t line from the file f b t fails +hen the end of the file is reached( $ail re pro)ides a nat ral +ay to control loops( $or e.ample,
while line := read(f) do write(line)

+rites the lines from the file f ntil an end of file ca ses read to fail, +hich terminates the loop( 1nother se of s ccess and fail re is ill strated by the operation
\expr

+hich fails if expr is n ll-)al ed b t prod ces the res lt of expr other+ise( *ince )ariables ha)e the n ll )al e initially, this operation may be sed to determine +hether a )al e has been assigned to an identifier, as in
if \x then write(x) else write("x is null")

If an e.pression that is enclosed in another e.pression does not prod ce a res lt, there is no )al e for the enclosing e.pression, it cannot perform a comp tation, and it also prod ces no res lt( $or e.ample( In
write(find("at", "They sit like bumps on a log."))

the e)al ation of find fails, there is no arg ment for +rite, and no )al e is +ritten( *imilarly, in
i := find("at", "They sit like bumps on a log.")

the assignment is not performed and the )al e of i is not changed( This 2inheritance2 of fail re allo+s comp tations to be e.pressed concisely( $or e.ample,
while write(read(f))

13

+rites the lines from the file f 5 st as the pre)io s loop 3the do cla se in +hile-do is optional4( The e.pression
not expr

in)erts s ccess and fail re( It fails if expr s cceeds, b t it s cceeds, prod cing the n ll )al e, if expr fails( *ome e.pressions prod ce )ariables, +hile others only prod ce )al es( $or e.ample,
i + j

prod ces a )al e, +hile


i := 10

prod ces its left-arg ment )ariable( The term result is sed to refer to a )al e or a )ariable( The term outcome is sed to refer to the conseI ences of e)al ating an e.pression---either its res lt or fail re( Loops( There are se)eral looping control str ct res in Icon in addition to +hile-do( $or e.ample,
until expr1 do expr2

e)al ates expr2 repeatedly ntil expr1 s cceeds( The control str ct re
repeat expr

simply e)al ates expr repeatedly, regardless of +hether it s cceeds or fails( 1 loop itself prod ces no res lt if it completes, and hence it fails if sed in a conditional conte.t( That is, +hen
while expr1 do expr2

terminates, its o tcome is fail re( This fail re ordinarily goes s ally are not sed as arg ments of other e.pressions( The control str ct re
break expr

nnoticed, since loops

ca ses the immediate termination of the e)al ation of the loop in +hich it appears, and control is transferred to the point immediately after the loop( The o tcome of the loop in this case is the o tcome of expr. If expr is omitted, it defa lts to the n ll )al e( 1n e.ample of the se of brea0 is@
while line := read(f) do if line == "end" then break else write(line)

C)al ation of the loop terminates if read fails or if the file f contains a line consisting of 2end2( The e.pression ne.t ca ses transfer to the beginning of the loop in +hich it occ rs( $or e.ample,
while line := read(f) do if line == "comment" then next else write(line)

does not +rite the lines of f that consist of 2comment2(

14

The brea0 and ne.t e.pressions can occ r only in loops, and they apply to the innermost loop in +hich they appear( The arg ment of brea0 can be a brea0 or ne.t e.pression, ho+e)er, so that, for e.ample,
break break next

brea0s o t of t+o le)els of loops and transfers control to the beginning of the loop in +hich they occ r( Case $%pressions( The case e.pression pro)ides a +ay of selecting one of se)eral e.pressions to e)al ate based on the )al e of a control e.pression, rather than its s ccess or fail re( The case e.pression has the form
case expr of { case clauses ... }

The )al e of expr is sed to select one of the case cla ses( 1 case cla se has the form
expr1 : expr2

+here the )al e of expr is compared to the )al e of expr1, and expr2 is e)al ated if the comparison s cceeds( There is also a defa lt case cla se, +hich has the form
default: expr3

If no other case cla se is selected, e.pr3 in the defa lt cla se is e)al ated( 1n e.ample is
case line := read(f) of { "end": write("*** end ***") "comment": write("*** comment ***") default: write(line) }

If the e)al ation of the control cla se fails, as for an end of file in this e.ample, the entire case e.pression fails( 6ther+ise, the o tcome of the case e.pression is the o tcome of e)al ating the selected e.pression( Generators( 1s mentioned pre)io sly, an e.pression may prod ce a seI ence of res lts( This occ rs in sit ations in +hich there is more than one possible res lt of a comp tation( 1n e.ample is
find("e", "They sit like bumps on a log.")

in +hich both 3 and 13 are possible res lts( 9hile most programming lang ages prod ce only the first res lt in s ch a sit ation, in Icon the t+o res lts are prod ced one after another if the s rro nding conte.t reI ires both of them( * ch e.pressions are called generators to emphasi7e their capability of prod cing more than one res lt( There are t+o conte.ts in +hich a generator can prod ce more than one res lt@ iteration and goal-directed evaluation( Iteration is designated by the control str ct re
every expr1 do expr2

in +hich e.pr1 is repeatedly res med to prod ce its res lts( $or each s ch res lt, e.pr2 is e)al ated( $or e.ample,
every i := find("e", "They sit like bumps on a log.") do write(i)

15

+rites 3 and 13( If the arg ment of an e.pression is a generator, the res lts prod ced by the generator are pro)ided to the enclosing e.pressionthe seI ence of res lts is inherited( ConseI ently, the pre)io s e.pression can be +ritten more compactly as
every write(find("e", "They sit like bumps on a log."))

Unli0e iteration, +hich res mes a generator repeatedly to prod ce all its res lts, goaldirected e)al ation res mes a generator only as necessary, in an attempt to ca se an enclosing e.pression to s cceed( 9hile iteration is e.plicit and occ rs only +here specified, goal-directed e)al ation is implicit and is an inherent aspect of IconJs e.pressione)al ation mechanism( "oal-directed e)al ation is ill strated by
if find("e", "They sit like bumps on a log") > 10 then write("found")

The first res lt prod ced by find is 3, and the comparison operation fails( /eca se of goaldirected e)al ation, find is a tomatically res med to prod ce another )al e( *ince this )al e, 13, is greater than 10, the comparison s cceeds, and fo nd is +ritten( 6n the other hand, in
if find("e", "They sit like bumps on a log.") > 20 then write("found")

the comparison fails for 3 and 13( 9hen find is res med again, it does not prod ce another res lt, the control cla se of if-then fails, and nothing is +ritten( There are se)eral e.pressions in Icon that are generators, incl ding string analysis f nctions that are similar in nat re to find( 1nother generator is
i to j by k

+hich generates the integers from i to 5 by increments of 0( If the by cla se is omitted, the increment defa lts to one( The operation M. is polymorphic, generating the elements of . for )ario s types( The meaning of 2element2 depends on the type of .( If . is a string, M. generates the onecharacter s bstrings of ., so that M2hello2 generates 2h2, 2e2, 2l2, 2l2, and 2o2( If . is a file, M. generates the lines of the file, and so on( Generati&e Control !tr"ct"res( There are se)eral control str ct res related to generators( The alternation control str ct re,
expr1 | expr2

generates the res lts of e.pr1 follo+ed by the res lts of e.pr2( $or e.ample,
every write("hello" | "howdy")

+rites t+o lines, hello and ho+dy( *ince alternation s cceeds if either of its arg ments s cceeds, it can be sed to prod ce the effect of logical dis5 nction( 1n e.ample is
if (i > j) | (j > k) then expr

+hich e)al ates e.pr if i is greater than 5 or if 5 is greater than 0( &ogical con5 nction follo+s as a nat ral conseI ence of goal-directed e)al ation( The operation
expr1 & expr2

is similar to other binary operations, s ch as expr1 N expr2, e.cept that it performs no comp tation( Instead, it prod ces the res lt of expr2, pro)ided that both expr1 and expr2 s cceed( $or e.ample,
if (i > j) & (j > k) then expr

e)al ates expr only if i is greater than 5 and 5 is greater than 0( Eepeated alternation,
|expr

generates the res lts of expr repeatedly and is ro ghly eI i)alent to


expr | expr | expr | ...

=o+e)er, if expr fails, the repeated alternation control str ct re stops generating res lts( $or e.ample,
|read(f)

generates the lines from the file f 3one line for each repetition of the alternation4 b t stops +hen read3f4 fails( #ote that a generator may be capable of prod cing an infinite n mber of res lts( $or e.ample,
|(1 to 3)

can prod ce 1, 2, 3, 1, 2, 3, 1, 2, 3, =o+e)er, only as many res lts as are reI ired by conte.t are act ally prod ced( Th s,
i := | (1 to 3)

only assigns the )al e 1 to i, since there is no conte.t to ca se the repeated alternation control str ct re to be res med for a second res lt( The limitation control str ct re
expr1 \ expr2

limits expr1 to at most expr2 res lts( ConseI ently,


| (1 to 3) \ 5

is only capable of prod cing 1, 2, 3, 1, 2( The 'rder of $&al"ation. 9ith the e.ception of the limitation control str ct re, arg ment e)al ation in Icon is strictly left-to-right( The res mption of e.pressions to prod ce additional res lts is in last-in, first-o t order( The res lt is 2cross-prod ct2 generation of res lts in e.pressions that contain se)eral generators( $or e.ample,
every write((10 to 30 by 10) + (1 to 3))

+rites 11, 12, 13, 21, 22, 23, 31, 32, 33( Control (acktracking. "oal-directed e)al ation res lts in control bac0trac0ing to obtain additional res lts from e.pressions that ha)e pre)io sly prod ced res lts, as in
if find("e", "They sit like bumps on a log.") > 10 then write("found")

Control bac0trac0ing is limited by a n mber of syntactic constr ctions( $or e.ample, in


if expr1 then expr2 else expr3

if expr1 s cceeds, b t expr2 fails, expr1 is not res med for another res lt( 3If it +ere, the semantics of this control str ct re +o ld not correspond to +hat 2if-then-else2 s ggests(4

1!

* ch an e.pression is called a bounded expression. The control cla ses of loops also are bo nded, as are the e.pressions +ithin compo nd e.pressions@
{ expr1; expr2; expr3; ...; exprn }

These e.pressions are e)al ated in seI ence, b t once the e)al ation of one is complete 3+hether it s cceeds or fails4, and the e)al ation of another begins, there is no possibility of bac0trac0ing into the preceding one( The last e.pression in a compo nd e.pression is not bo nded, ho+e)er( C.cept in s ch specific sit ations, e.pressions are not bo nded( $or e.ample, in
if expr1 then expr2 else expr3

neither expr2 nor expr3 is bo nded( *ince Icon control str ct res are e.pressions that may ret rn res lts, it is possible to +rite e.pressions s ch as
every write(if i > j then j to i else i to j)

+hich +rites the integers from i to 5 in ascending seI ence( )ata (acktracking. 9hile control bac0trac0ing is a f ndamental part of e.pression e)al ation in Icon, data bac0trac0ing is not performed e.cept in a fe+ specific operations( $or e.ample, in
(i := 3) & read(f)

the )al e 3 is assigned to i( C)en if read(f) fails, the former )al e of i is not restored( There are, ho+e)er, specific operations in +hich data bac0trac0ing is performed( $or e.ample, the reversible assignment operation
x <- y

assigns the )al e of y to ., b t it restores the former )al e of . if control bac0trac0ing into this e.pression occ rs( Th s,
(i <- 3) & read(f)

assigns 3 to i b t restores the pre)io s )al e of i if read3f4 fails( 2.1.3 Csets and Strings Csets are nordered sets of characters, +hile strings are seI ences of characters( There are 2AB different characters, the first 12G of +hich are interpreted as 1*CII( The n mber and interpretation of characters is independent of the architect re of the comp ter on +hich Icon is implemented( Csets( Csets are represented literally by s rro nding their characters by single I otation mar0s( $or e.ample,
vowels := 'aeiouAEIOU'

assigns a cset of 10 characters to )o+els( There are se)eral b ilt-in csets that are the )al es of 0ey+ords( These incl de &lcase, &ucase, and &cset, +hich contain the lo+ercase letters, the ppercase letters, and all 2AB characters, respecti)ely( 6perations on csets incl de nion, intersection, difference, and complement +ith respect to &cset( Csets are sed in le.ical analysis( $or e.ample, the f nction upto(c, s) is analogo s to find(s1, s2), e.cept that it generates the positions at +hich any character of c occ rs in s( Th s,

1"

upto(vowels, "They sit like bumps on a log.")

is capable of prod cing 3, F, 11, 13, 1B, 21, 2<, and 2F( !trings( *trings are represented literally by s rro nding their characters +ith do ble I otation mar0s instead of single I otation mar0s( The empty string, +hich contains no characters, is gi)en by ""( The si7e of a string is gi)en by *s( $or e.ample, if
command := "Sit still!"

then the )al e of *command is 10( The )al e of *"" is 0( *pace for strings is pro)ided a tomatically and there is no inherent limit to the si7e of a string( There are se)eral operations that constr ct strings( The principal one is concatenation, denoted by
s1 || s2

The f nction repl(s, i) prod ces the res lt of concatenating s i times( Th s,


write(repl("*!",3))

+rites *!*!*!( 6ther string constr ction f nctions incl de reverse(s), +hich prod ces a string +ith the characters of s in re)erse order, and trim(s, c), +hich prod ces a string in +hich trailing characters of s that occ r in c are omitted( There also are f nctions for positioning a string in a field of a fi.ed +idth( $or e.ample, the f nction left(s1, i, s2) prod ces a string of length i +ith s1 positioned at the left and padded +ith copies of s2 as needed( * bstrings are prod ced by s bscripting a string +ith the beginning and ending positions of the desired s bstring( Positions in strings are bet+een characters, and the position before the first character of a string is n mbered 1( $or e.ample,
verb := command[1:4]

assigns the string "Sit" to verb( * bstrings also can be specified by the beginning position and a length, as in
verb := command[1+:3]

If the length of a s bstring is 1, only the first position need be gi)en, so that the )al e of command[2] is "i"( 1ssignment can be made to a s bscripted string to prod ce a ne+ string( $or e.ample,
command[1:4] := "Remain"

changes the )al e of command to "Remain still!"( *tring operations are applicati)e, no operation on a string in Icon changes the characters in it( The preceding e.ample may appear to contradict this, b t in fact
command[1:4] := "Remain"

is an abbre)iation for
command := "Remain" || command[5:11]

Th s, a ne+ string is constr cted and then assigned to command( #onpositi)e )al es can be sed to specify a position +ith respect to the right end of a string( $or e.ample, the )al e of command[-1] is "!"( The )al e 0 refers to the position after the last character of a string, so that if the )al e of command is "Sit still!",

1#

command[5:0]

is eI i)alent to
command[5:11]

The s bscript positions can be gi)en in either order( Th s,


command[11:5]

prod ces the same res lt as


command[5:11]

*tring-analysis f nctions li0e find and pto ha)e optional third and fo rth arg ments that allo+ their range to be restricted to a partic lar portion of a string( $or e.ample,
upto(vowels, "They sit like bumps on a log.", 10, 20)

only prod ces positions of )o+els bet+een positions 10 and 20 of its second arg ment@ 11, 13, and 1B( If these arg ments are omitted, they defa lt to 1 and 0, so that the entire string is incl ded in the analysis( Mapping( 6ne of the more interesting string-)al ed f nctions in Icon is map3s1, s2, s34( This f nction prod ces a string obtained from a character s bstit tion on s1( Cach character of s1 that occ rs in s2 is replaced by the corresponding character in s3( $or e.ample,
write(map("Remain still!", "aeiou", "*****"))

+rites EKmKKn *tKllM( Characters in s1 that do not appear in s2 are nchanged, as this e.ample sho+s( If a character occ rs more than once in s2, its right-most correspondence in s3 applies( ConseI ently,
s2 := &lcase || &ucase || "aeiou" s3 := repl("|",26) || repl("u",26) || "*****" write(map("Remain still!", s2, s3))

+rites KOKKO OOKOOM( 2.1.4 String Scanning *tring scanning is a high-le)el facility for string analysis that s ppresses the comp tational details associated +ith the e.plicit location of positions and s bstring specifications( In string scanning, a s b5ect ser)es as a foc s of attention( 1 position in this s b5ect is maintained a tomatically( 1 string-scanning e.pression has the form
expr1 ? expr2

in +hich the e)al ation of expr1 pro)ides the s b5ect( The position in the s b5ect is 1 initially( The e.pression expr2 is then e)al ated in the conte.t of this s b5ect and position( 1ltho gh expr2 can contain any operation, t+o matching functions are sef l in analy7ing the s b5ect@ tab(i) move(i) set the position in the s b5ect to i increment the position in the s b5ect by i

/oth of these f nctions ret rn the s bstring of the s b5ect bet+een the old and ne+ positions( If the position is o t of the range of the s b5ect, the matching f nction fails and the position is not changed( The position can be increased or decreased( #onpositi)e

2&

)al es can be sed to refer to positions relati)e to the end of the s b5ect( Th s, tab304 mo)es the position to the end of the s b5ect, matching the remainder of the s b5ect( 1n e.ample of string scanning is
line ? while write(move(2))

+hich +rites s ccessi)e t+o-character s bstrings of line, stopping +hen there are not t+o characters remaining( In string scanning, the trailing arg ments of string analysis f nctions s ch as find and pto are omitted, the f nctions apply to the s b5ect at the c rrent position( Therefore, s ch f nctions can be sed to pro)ide arg ments for matching f nctions( 1n e.ample is
line ? write(tab(find("::=")))

+hich +rites the initial portion of line p to an occ rrence of the string 2@@P2( If a matching f nction is res med, it restores the position in the s b5ect to the )al e that it had before the matching f nction +as e)al ated( $or e.ample, s ppose that line contains the s bstring 2@@P2( Then
line ? ((tab(find("::=") + 3)) & write(move(10)) | write(tab(0)))

+rites the 10 characters after 2@@P2, pro)ided there are 10 more characters( =o+e)er, if there are not 10 characters remaining, mo)e3104 fails and tab3find32@@P244 is res med( It restores the position to the beginning of the s b5ect, and the alternati)e, tab304, matches the entire s b5ect, +hich is +ritten( %ata bac0trac0ing of the position in the s b5ect is important, since it allo+s matches to be performed +ith the ass rance that any pre)io s alternati)es that failed to match left the position +here it +as before they +ere e)al ated( The s b5ect and position are directly accessible as the )al es of the 0ey+ords Ls b5ect and Lpos, respecti)ely( $or e.ample,
&subject := "Hello"

assigns the string 2=ello2 to the s b5ect( 9hene)er a )al e is assigned to the s b5ect, Lpos is set to 1 a tomatically( The )al es of Ls b5ect and Lpos are sa)ed at the beginning of a string-scanning e.pression and are restored +hen it completes( ConseI ently, scanning e.pressions can be nested( 2.1.5 Lists 1 list is a linear aggregate of )al es 32elements24( $or e.ample,
cities := ["Portland", "Toledo", "Tampa"]

assigns a list of three strings to cities( &ists can be heterogeneo s, as in


language := ["Icon", 1978, "The University of Arizona"]

1n empty list, containing no elements, is prod ced by QR( The f nction


list(i, x)

prod ces a list of i elements, each of +hich has the )al e of .( The si7e operation K. also applies to lists( The )al e of Kcities is 3, for e.ample(

21

1n element of a list is referenced by a s bscripting e.pression that has the same form as the one for strings( $or e.ample,
cities[3] := "Miami"

changes the )al e of cities to


["Portland", "Toledo", "Miami"]

The f nction sort 3a4 prod ces a sorted copy of a( $or e.ample, sort3cities4 prod ces
["Miami", "Portland", "Toledo"]

&ist operations, nli0e string operations, are not applicati)e( 9hile assignment to a s bstring is an abbre)iation for concatenation, assignment to a s bscripted list changes the )al e of the s bscripted element( 1 list )al e is a pointer to a str ct re that contains the elements of the list( 1ssignment of a list )al e copies this pointer, b t it does not copy the str ct re( ConseI ently, in
states := ["Nevada", "Texas", "Maine", "Georgia"] slist := states

both states and slist point to the same str ct re( /eca se of this,
states[2] := "Arkansas"

changes the second element of slist as +ell as the second element of states( The elements of a list may be of any type, incl ding lists, as in
tree := ["a", ["b", ["c"], ["d"]]]

+hich can be depicted as

*tr ct res also can be sed to represent loops, as in


graph := ["a", ""] graph[2] := graph

+hich can be depicted as

&ists are not fi.ed in si7e( Clements can be added to them or remo)ed from them at their ends by I e e and stac0 f nctions( The f nction p t3a, .4 adds the )al e of . to the right end of the increasing its si7e by one( *imilarly, p sh3a, .4 adds the )al e of . to the left end of a( $or e.ample,
lines := [] while put(lines, read(f))

22

constr cts a list of the lines from the file f( Con)ersely,


lines := []

while push(lines, read(f)) constr cts a list of lines in re)erse order( The f nctions pop3a4 and get3a4 are the same( They both remo)e an element from the left end of a and ret rn it as the )al e of the f nction call, b t they fail if a is empty( ConseI ently,
lines := [] while push(lines, read(f)) while write(pop(lines))

+rites o t the lines of f in re)erse order( The f nction p ll3a4 is similar, b t it remo)es an element from the right end of a( 6ther operations on lists incl de concatenation, +hich is denoted by
a1 ||| a2

+here a1 and a2 are lists( There is no a tomatic con)ersion of other types to lists( &ist sectioning is denoted by a[i:j] The res lt is a new list containing )al es i thro gh 5 of a( There is no inherent limit to the si7e of a list, either +hen it is originally created or as a res lt of adding elements to it( 2.1.6 Sets 1 set is an nordered collection of )al es( Unli0e csets, +hich contain only characters, sets are collections of Icon )al es that can be of any type( 1 set is constr cted from a list by set3a4( $or e.ample,
states := set(["Virginia", "Rhode Island", "Kansas", "Illinois"])

assigns a set of fo r elements to states( The operation


member(s, x)

s cceeds if the )al e of . is a member of s b t fails other+ise( The operation


insert(s, x)

adds the )al e of . to s if it is not already a member of s, +hile delete(s, x) deletes the )al e of . from s( The operations of nion, intersection, and difference for sets also are pro)ided( &i0e other str ct res, sets can be heterogeneo s( 1 set can e)en be a member of itself, as in
insert(s, s)

There is no contradiction here, since a set )al e is a pointer to the str ct re for the set(

23

2.1.7 Tables 1 table is a set of pairs of )al es( Tables pro)ide an associati)e loo0 mechanism as contrasted +ith positional references to lists( They can be s bscripted +ith an entry value to +hich a )al e can be assigned to ma0e p a pair called a table element( 1 table is created by
table(x)

Tables are empty initially( The )al e of . is an assigned defa lt )al e that is prod ced if the table is s bscripted +ith an entry )al e to +hich no )al e has been assigned 3that is, for an element that is not in the table4( $or e.ample,
states := table(0)

assigns to states a table +ith a defa lt )al e of 0( 1n element can be added to states by an assignment s ch as
states["Oregon"] := 1

+hich adds a table element for 26regon2 +ith the )al e 1 to states( 6n the other hand,
write(states ["Utah"])

+rites 0, the defa lt )al e, if there is no element in the table for 2Utah2( Tables can be heterogeneo s and ha)e a mi.t re of types for entry and assigned )al es( Tables gro+ a tomatically in si7e as ne+ elements are added and there is no inherent limit on the si7e of a table( 2.1.8 Records 1 record is an aggregate of )al es that is referenced by named fields( Cach record type has a separate name( 1 record type and the names of its fields are gi)en in a declaration( $or e.ample,
record rational(numerator, denominator)

declares a record of type rational +ith t+o fields@ n merator and denominator( 1n instance of a record is created by calling a record-constr ctor f nction corresponding to the form of the declaration for the record type( Th s,
r := rational(3,5)

assigns to r a record of type rational +ith a n merator field of 3 and a denominator field of A( $ields are referenced by name, as in
write(r.numerator)

+hich +rites 3( $ields can also be referred to by position, rQ1R is eI i)alent to r(n merator( There is no inherent limit to the n mber of different record types( The same field names can be gi)en for different record types, and s ch fields need not be in the same position for all s ch record types( 2.1.9 Input and Output Inp t and o tp t in Icon are seI ential and comparati)ely simple( The standard inp t, standard o tp t, and standard error o tp t files are the )al es of Linp t, Lo tp t, and Lerro t, respecti)ely( The f nction
open(s1,s2)

24

opens the file +hose name is s1 according to options gi)en by s2 and prod ces a )al e of type file( Typical options are 2r2 for opening for reading and 2+2 for opening for +riting( The defa lt is 2r2( $or e.ample,
log := open("grade.log", "w")

assigns a )al e of type file to log, corresponding to the data file grade(log, +hich is opened for +riting( The f nction open fails if the specified file cannot be opened according to the options gi)en( The f nction close3f4 closes the file f( The f nction read3f4 reads a line from the file f b t fails if an end of file is enco ntered( The defa lt is standard inp t if f is omitted( The res lt of
write(x1,x2, ..., xn)

depends on the types of .1, .2, (((, .n( *trings and types con)ertible to strings are +ritten, b t if one of the arg ments is a file, s bseI ent strings are +ritten to that file( The defa lt file is standard o tp t( Th s,
write(s1,s2)

+rites the concatenation of s1 and s2 to standard o tp t, b t


write(log,s)

+rites s to the file grade(log( In any e)ent, +rite ret rns the string )al e of the last arg ment +ritten( The f nction
stop(x1, x2, ..., xn)

prod ces the same o tp t as +rite, b t it then terminates program e.ec tion( 2.1.10 Procedures *roced"re )eclarations. The e.ec table portions of an Icon program are contained in proced re declarations( Program e.ec tion begins +ith a call of the proced re main( 1n e.ample of a proced re declaration is@
procedure maxstr(slist) local max, value max := 0 every value := *!slist do if value> max then max := value return max end

This proced re comp tes the longest string in a list of strings( The formal parameter slist and the identifiers ma. and )al e are local to calls of the proced re ma.str( *torage for them is allocated +hen ma.str is called and deallocated +hen ma.str ret rns( 1 proced re call has the same form as a f nction call( $or e.ample,
lines := [] while put(lines, read(f)) write(maxstr(lines))

+rites the length of the longest line in the file f( 1 proced re call may fail to prod ce a res lt in the same +ay that a b ilt-in operation can fail( This is indicated by fail in the proced re body in place of ret rn( $or e.ample, the

25

follo+ing proced re ret rns the length of the longest string in slist b t fails if that length is less than limit@
procedure maxstr(slist, limit) local max, value max := 0 every value := *!slist do if value> max then max := value if max < limit then fail else return max end

$lo+ing off the end of a proced re body +itho t an e.plicit ret rn is eI i)alent to fail( 1 proced re declaration may ha)e static identifiers that are 0no+n only to calls of that proced re b t +hose )al es are not destroyed +hen a call ret rns( 1 proced re declaration also may ha)e an initial cla se +hose e.pression is e)al ated only the first time the proced re is called( The se of a static identifier and an initial cla se is ill strated by the follo+ing proced re, +hich ret rns the longest of all the strings in the lists it has processed@
procedure maxstrall(slist) local value static max initial max := 0 every value := *!slist do if value> max then max := value return max end

*roced"res and #"nctions. Proced res and f nctions are sed in the same +ay( Their names ha)e global scope( 6ther identifiers can be declared to ha)e global scope, as in
global count

* ch global declarations are on a par +ith proced re declarations and cannot occ r +ithin proced re declarations( 1 call s ch as
write(maxstr(lines))

applies the value of the identifier ma.str to lines and applies the value of the identifier +rite to the res lt( There is nothing fi.ed abo t the )al es of s ch identifiers( In this case, the initial )al e of ma.str is a proced re, as a conseI ence of the proced re declaration for it( *imilarly, the initial )al e of +rite is a f nction( These )al es can be assigned to other )ariables, as in
print := write ... print(maxstr(lines))

in +hich the f nction that is the initial )al e of +rite is assigned to print( *imilarly, nothing pre)ents an assignment to an identifier +hose initial )al e is a proced re( ConseI ently,
write := 3

assigns an integer to +rite, replacing its initial f nction )al e( 1ltho gh it is typical to call a proced re by sing an identifier that has the proced re )al e, the proced re sed in a call can be comp ted( The general form of a call is

expr0(expr1, expr2, ..., exprn) +here the )al e of expr0 is applied to the arg ments res lting from the e)al ation of expr1 expr2, (((, exprn. $or e.ample, (proclist[i])(expr1, , expr2, ..., exprn) applies the proced re that is the ith element of proclist( Proced res may be called rec rsi)ely( The rec rsi)e nat re of a call depends on the fact that proced re names are global( The 2$ibonacci strings2 pro)ide an e.ample@
procedure fibstr(i) if i = 1 then return "a" else if i = 2 then return "b" else return fibstr(i - 1) || fibstr(i - 2) end

1n identifier that is not declared in a proced re and is not global defa lts to local( Th s, local declarations can be omitted, as in
procedure maxstr(slist) max := 0 every value := * !slist do if value > max then max := value return max end

*roced"res as Generators( In addition to ret rning and failing, a proced re can also s spend( In this case, the )al es of its arg ments and local identifiers are not destroyed, and the call can be res med to prod ce another res lt in the same +ay a b ilt-in generator can be res med( 1n e.ample of s ch a generator is
procedure intseq(i) repeat { suspend i i +:= 1 } end

1 call intseI3104, for e.ample, is capable of generating the infinite seI ence of integers 10, 11, 12, ((( ( $or e.ample,
every f(intseq(10) \ 5)

calls f3104, f3114, f3124, f3134, and f31<4( If the arg ment of s spend is a generator, the generator is res med +hen the call is res med and the call s spends again +ith the res lt it prod ces( 1 generator of the $ibonacci strings pro)ides an e.ample@
procedure fibstrseq() local s1, s2, s s1 :! "a" s2 :! "b" suspend (s1 # s2) repeat $ suspend s :! s1 ## s2 s1 :! s2 s2 :! s % end

2!

9hen this proced re is called, the first s spend e.pression prod ces the )al e of s1, 2a2( If the call of fibstrseI is res med, the arg ment of s spend is res med and prod ces the )al e of s2, 2b2( If the call is res med again, there is no f rther res lt for the first s spend, and e)al ation contin es to the repeat loop( Eepeated alternation often is sef l in s pplying an endless n mber of alternati)es( $or e.ample, the proced re intseI3i4 can be re+ritten as
procedure intseq(i) suspend i | (i +:= |1) end

#ote that O1 is sed to pro)ide an endless seI ence of increments( +rg"ment Transmission( 6mitted arg ments in a proced re or f nction call 3incl ding trailing ones4 defa lt to the n ll )al e( C.tra arg ments are e)al ated, b t their )al es are discarded( *ome f nctions, s ch as +rite, may be called +ith an arbitrary n mber of arg ments( 1ll arg ments to proced res and f nctions are passed by )al e( If the e)al ation of an arg ment e.pression fails, the proced re or f nction is not called( This applies to e.tra arg ments( 1rg ments are not dereferenced ntil all of them ha)e been e)al ated( %ereferencing cannot fail( *ince no arg ment is dereferenced ntil all arg ment e.pressions are e)al ated, e.pressions +ith side effects can prod ce ne.pected res lts( Th s, in
write(s, s := "hello")

the )al e +ritten is hellohello, regardless of the )al e of s before the e)al ation of the second arg ment of +rite( )ereferencing in Ret"rn $%pressions( The res lt ret rned from a proced re call is dereferenced nless it is a global identifier, a static identifier, a s bscripted str ct re, or a s bscripted string-)al ed global identifier( In these e.ceptional cases, the )ariable is ret rned and assignment can be made to the proced re call( 1n e.ample is
procedure maxel(a, i, j) if i > j then return a[i] else return a[j] end

=ere a list element, depending on the )al es of i and 5, is ret rned( 1n assignment can be made to it, as in
maxel(lines, i, j) := "end"

+hich assigns 2end2 to linesQiR or linesQ5R, depending on the )al es of i and 5( M"t"al $&al"ation( In a call e.pression, the )al e of expr0 can be an integer i as +ell as a proced re( In this case, called mutual evaluation, the res lt of the ith arg ment is prod ced( $or e.ample,
i := 1(find(s1, line1), find(s2, line2))

assigns to i the position of s1 in line1, pro)ided s1 occ rs in line1 and that s2 occ rs in line2( If either call of find fails, the e.pression fails and no assignment is made( The selection integer in m t al e)al ation can be negati)e, in +hich case it is interpreted relati)e to the end of the arg ment list( ConseI ently,

2"

(-1)(expr1,expr2, ..., exprn)

prod ces the res lt of e.prn and is eI i)alent to


expr1 & expr2 & ''' & exprn

The selection integer can be omitted, in +hich case it defa lts to -1( 2.1.11 Co-Expressions The e)al ation of an e.pression in Icon is limited to the site in the program +here it appears( Its res lts can be prod ced only at that site as a res lt of iteration or goal-directed e)al ation( $or e.ample, the res lts generated by intseI3i4 described in *ection 2(1(10 can only be prod ced +here it is called, as in
every f(intseq(10) \ 5)

It is often sef l, ho+e)er, to be able to prod ce the res lts of a generator at )ario s places in the program as the need for them arises( Co-e.pressions pro)ide this facility by gi)ing a conte.t for the e)al ation of an e.pression that is maintained in a data str ct re( Co-e.pressions can be activated to prod ce the res lts of a generator on demand, at any time and place in the program( 1 co-e.pression is constr cted by
create expr

The e.pression expr is not e)al ated at this time( Instead, an ob5ect is prod ced thro gh +hich expr can be res med at a later time( $or e.ample,
label := create ("L" || (1 to 100) || ":")

assigns to label a co-e.pression for the e.pression


"L" || (1 to 100) || ":"

The operation ;label acti)ates this co-e.pression, +hich corresponds to res ming its e.pression( $or e.ample,
write(@label) write(" tstl write(@label) count")

+rites
L1: tstl L2: count

If the res mption of the e.pression in a co-e.pression does not prod ce a res lt, the coe.pression acti)ation fails( $or e.ample, after ;label has been e)al ated 100 times, s bseI ent e)al ations of ;label fail( The n mber of res lts that a co-e.pression e has prod ced is gi)en by Ke( The general form of the acti)ation e.pression is
expr1 ( expr2

+hich acti)ates expr2 and transmits the res lt of expr1 to it( This form of acti)ation can be sed to ret rn a res lt to the co-e.pression that acti)ated the c rrent one( 1 co-e.pression is a )al e li0e any other )al e in Icon and can be passed as an arg ment to a proced re, ret rned from a proced re( and so forth( 1 co-e.pression can s r)i)e the call of the proced re in +hich it is created(

2#

If the arg ment of a create e.pression contains identifiers that are local to the proced re in +hich the create occ rs, copies of these local identifiers are incl ded in the co-e.pression +ith the )al es they ha)e at the time the create e.pression is e)al ated( These copied identifiers s bseI ently are independent of the local identifiers in the proced re( Consider, for e.ample,
procedure labgen(tag) local i, j ... i := 10 j := 20 e := create (tag || (i to j) || ":") ... i := j if i > 15 then return e ... end

The e.pression
labels := labgen("X")

assigns to labels a co-e.pression that is eI i)alent to e)al ating


create ("X" || (10 to 20) || ":")

The fact that i is changed after the co-e.pression +as assigned to e, b t before e ret rns, does not affect the co-e.pression, since it contains copies of i and 5 at the time it +as created( * bseI ent changes to the )al es of i or 5 do not affect the co-e.pression( 1 copy of a co-e.pression e is prod ced by the refresh operation, Se( 9hen a refreshed copy of a co-e.pression is made, its e.pression is reset to its initial state, and the )al es of any local identifiers in it are reset to the )al es they had +hen the co-e.pression +as created( $or e.ample,
newlabels := ^labels

assigns to ne+labels a co-e.pression that is capable of prod cing the same res lts as labels, regardless of +hether or not labels has been acti)ated( The )al e of the 0ey+ord Lmain is the co-e.pression for the call of main that initiates program e.ec tion( 2.1.12 Diagnostic Facilities !tring Images( The f nction type3.4 only prod ces the string name of the type of ., b t the f nction image3.4 prod ces a string that sho+s the )al e of .( $or strings and csets, the )al e is sho+n +ith s rro nding I otation mar0s in the fashion of program literals( $or e.ample,
write(image("Hi there!"))

+rites 2=i thereM2, +hile


write(image('aeiou'))

+rites Jaeio J( $or str ct res, the type name and si7e are gi)en( $or e.ample,
write(image([]))

+rites list304(

3&

'ario s forms are sed for other types of data, sing type names +here necessary so that different types of )al es are disting ishable( Tracing( If the )al e of the 0ey+ord Ltrace is non7ero, a trace message is prod ced +hene)er a proced re is called, ret rns, fails, s spends, or is res med( Trace messages are +ritten to standard error o tp t( The )al e of Ltrace is decremented for e)ery trace message( Tracing stops if the )al e of Ltrace becomes 7ero, +hich is its initial )al e( * ppose that the follo+ing program is contained in the file fibstr(icn@
procedure main() &trace := -1 fibstr(3) end procedure fibstr(i) if i = 1 then return "a" else if i = 2 then return "b" else return fibstr(i -1) || fibstr(i -2) end

The trace o tp t of this program is


fibstr'icn: fibstr.icn: 9 fibstr.icn: 8 fibstr .icn: 9 fibstr.icn: 7 fibstr.icn: 9 fibstr.icn: 4 # fibstr( ) | | fibstr(2) | | fibstr returned "b" | | fibstr(1) | | fibstr returned "b" | fibstr returned "ba" main failed

In addition to the indentation corresponding to the le)el of proced re call, the )al e of the 0ey+ord Lle)el also is the c rrent le)el of call( )ispla ing Identifier ,al"es( The f nction display3i, f4 +rites a list of all identifiers and their )al es for i le)els of proced re calls, starting at the c rrent le)el( If i is omitted, the defa lt is Lle)el, +hile if f is omitted, the list is +ritten to standard error o tp t( The format of the listing prod ced by display is ill strated by the follo+ing program@
procedure main() log := open("grade.log", "w") while write(log, check(readO)) end procedure check(value) static count initial count := 0 if numeric(value) then { count +:= 1 return value } else { display() stop("nonnumeric value") } end

* ppose that the tenth line of inp t is the nonn meric string 23(a2( Then the o tp t of display is

31

check local identifiers: value = "3.a" count = 9 main local identifiers: log = file(grade.log) global identifiers: main = procedure main check = procedure check open = function open write = function write read = function read numeric = function numeric display = function display stop = function stop

$rror Messages( If an error is enco ntered d ring program e.ec tion, a message is +ritten to standard error o tp t and e.ec tion is terminated( $or e.ample, if the tenth line of a program contained in the file chec0(icn is
i +:= "x"

e)al ation of this e.pression prod ces the error message


Run-time error 102 at line 10 in check.icn numeric expected offending value: "x"

$.$ "ang#age %eat#res and the mplementation


C)en a c rsory consideration of Icon re)eals that some of its feat res present implementation problems and reI ire approaches that are different from ones sed in more con)entional lang ages( In the case of a lang age of the si7e and comple.ity of Icon, it is important to place different aspects of the implementation in perspecti)e and to identify specific problems( ,al"es and ,aria-les. The absence of type declarations in Icon has far-reaching implications( *ince any )ariable may ha)e a )al e of any type and the type may change from time to time d ring program e.ec tion, there m st be a +ay of representing )al es niformly( This is a significant challenge in a lang age +ith a +ide )ariety of types ranging from integers to co-e.pressions( =eterogeneo s str ct res follo+ as a nat ral conseI ence of the lac0 of type declarations( In one sense, the absence of type declarations simplifies the implementation@ there is not m ch that can be done abo t types d ring program translation 3compilation4, and some of the +or0 that is normally performed by con)entional compilers can be a)oided( The problems do not go a+ay, ho+e)er--they 5 st mo)e to another part of the implementation, since r n-time type chec0ing is reI ired( 1 tomatic type con)ersion according to conte.t goes hand-in-hand +ith type chec0ing( !torage Management. *ince strings and str ct res are created d ring program e.ec tion, rather than being declared, the space for them m st be allocated as needed at r n time( This implies, in t rn, some mechanism for reclaiming space that has been allocated b t +hich is no longer needed--2garbage collection(2 These iss es are complicated by the di)ersity of types and si7es of ob5ects, the lac0 of any inherent si7e limitations, and the possibility of pointer loops in circ lar str ct res(

32

!trings. Independent of storage-management considerations, strings reI ire special attention in the implementation( The emphasis of Icon is on string processing, and it is necessary to be able to process large amo nts of string data s fficiently( *trings may be )ery long and many operations prod ce s bstrings of other strings( The repertoire of string analysis and string synthesis f nctions is large( 1ll this adds p to the need for a +elldesigned and coherent mechanism for handling strings( !tr"ct"res. IconJs n s al str ct res, +ith sophisticated access mechanisms, also pose problems( In partic lar, str ct res that can change in si7e and can gro+ +itho t limit reI ire different implementation approaches than static str ct res of fi.ed si7e and organi7ation( The fle.ibility of positional, stac0, and I e e access mechanisms for lists reI ires compromises to balance efficient access for different ses( *ets of )al es +ith arbitrary types, combined +ith a range of set operations, pose non-tri)ial implementation problems( Tables are similar to sets, b t reI ire additional attention beca se of the implicit +ay that elements are added( *roced"res and #"nctions. *ince proced res and f nctions are )al es, they m st be represented as data ob5ects( >ore significantly, the meaning of a f nction call cannot, in general, be determined +hen a program is translated( The e.pression +rite3s4 may +rite a string or it may do something else, depending on +hether or not +rite still has its initial )al e( * ch meanings m st, instead, be determined at r n time( *ol morphic 'perations. 1ltho gh the meanings of operations cannot be changed d ring program e.ec tion in the +ay that the meanings of calls can, se)eral operations perform different comp tations depending on the types of their operands( Th s, .QiR may s bscript a string, a list, or a table( The meanings of some operations also depend on +hether they occ r in an assignment or a dereferencing conte.t( $or e.ample, if s has a string )al e, assignment to s QiR is an abbre)iation for a concatenation follo+ed by an assignment to s, +hile if sQiR occ rs in a conte.t +here its )al e is needed, it is simply a s bstring operation( >oreo)er, the conte.t cannot, in general, be determined at translation time( The +ay s bscripting operations are specified in Icon offers considerable con)enience to the programmer at the e.pense of considerable problems for the implementer( $%pression $&al"ation. "enerators and goal-directed e)al ation present ob)io s implementation problems( There is a large body of 0no+ledge abo t the implementation of e.pression e)al ation for con)entional lang ages in +hich e.pressions al+ays prod ce a single res lt, b t there is comparati)ely little 0no+ledge abo t implementing e.pressions that prod ce res lts in seI ence( 9hile there are lang ages in +hich e.pressions can prod ce more than one res lt, this capability is limited to specific conte.ts, s ch as pattern matching, or to specific control str ct res or data ob5ects( In Icon, generators and goal-directed e)al ation are general and per)asi)e and apply to all e)al ation conte.ts and to all types of data( ConseI ently, their implementation reI ires a fresh approach( The mechanism also has to handle the se of fail re to dri)e control str ct res and m st s pport no)el control str ct res, s ch as alternation and limitation( Cfficiency is a serio s concern, since +hate)er mechanism is sed to implement generators is also sed in con)entional comp tational sit ations in +hich only one res lt is needed(

33

!tring !canning. *tring scanning is comparati)ely simple( The s b5ect and position--2state )ariables2--ha)e to be sa)ed at the beginning of string scanning and restored +hen it is completed( 1ct al string analysis and matching follo+ tri)ially from generators and goal-directed e)al ation( Co.$%pressions. Co-e.pressions, +hich are only rele)ant beca se of the e.pressione)al ation mechanism of Icon, introd ce a +hole ne+ set of comple.ities( 9itho t coe.pressions, the res lts that a generator can prod ce are limited to its site in the program( Control bac0trac0ing is limited syntactically, and its scope can be determined d ring program translation( 9ith co-e.pressions, a generator in a state of s spension can be acti)ated at any place and time d ring program e.ec tion( ECTE6*PCCTI'C@ Icon has a n mber of n s al feat res that are designed to facilitate programming, and it has an e.tensi)e repertoire of string and str ct re operations( 6ne of IconJs notable characteristics is the freedom from translation-time constraints and the ability to specify and change the meanings of operations at r n time( This r n-time fle.ibility is )al able to the programmer, b t it places s bstantial b rdens on the implementation---and also ma0es it interesting( 1t the top le)el, there is the I estion of ho+ act ally to carry o t some of the more sophisticated operations( Then there are I estions of efficiency, both in e.ec tion speed and storage tili7ation( There are endless possibilities for alternati)e approaches and refinements( It is +orth noting that many aspects of the implementation are relati)ely independent of each other and can be approached separately( 6perations on strings and str ct res are largely dis5oint and can, e.cept for general considerations of the representation of )al es and storage management, be treated as independent problems( The independence of e.pression e)al ation from other implementation considerations is e)en clearer( 9itho t generators and goal-directed e)al ation, Icon +o ld be a fairly con)entional high-le)el string and str ct re processing lang age, albeit one +ith interesting implementation problems( 6n the other hand, generators and goal-directed e)al ation are not dependent in any significant +ay on string and str ct re data types( "enerators, goal-directed e)al ation, and related control str ct res co ld 5 st as +ell be incorporated in a programming lang age emphasi7ing n merical comp tation( The implementation problems related to e.pression e)al ation in the t+o conte.ts are largely the same( 9hile ntyped )ariables and a tomatic storage management ha)e per)asi)e effects on the o)erall implementation of Icon, there are se)eral aspects of Icon that are separable from the rest of the lang age and its implementation( 1ny specific data str ct re, string scanning, or co-e.pressions co ld be eliminated from the lang age +itho t significantly affecting the rest of the implementation( *imilarly, ne+ data str ct res and ne+ access mechanisms co ld be added +itho t reI iring significant modifications to the balance of the implementation( EXERCISES 2(1 9hat is the o tcome of the follo+ing e.pression if the file f contains a line consisting of 2end2, or if it does not?
)*ile line :! read(f) do if line !! "end" t*en break else )rite(line)

34

2(2 9hat does


)rite("*ello" # "*o)d+")

+rite? 2(3 9hat is the res lt of e)al ating the follo+ing e.pression@ 131 to 34 T 10 2(< C.plain the rationale for dereferencing of )ariables +hen a proced re call ret rns( 2(A "i)e an e.ample of a sit ation in +hich it cannot be determined ntil r n time +hether a string s bscripting e.pression is sed in an assignment or a dereferencing conte.t(

35

Chapter 3: $rgani'ation of the Implementation


PCE*PCCTI'C@ >any factors infl ence the implementation of a programming lang age( The properties of the lang age itself, of co rse, are of paramo nt importance( /eyond this, goals, reso rces, and many other factors may affect the nat re of an implementation in significant and s btle +ays( In the case of the implementation of Icon described here, se)eral n s al factors deser)e mention( To begin +ith, IconJs origins +ere in a research pro5ect, and its implementation +as designed not only to ma0e the lang age a)ailable for se b t also to s pport f rther lang age de)elopment( The lang age itself +as less +ell defined and more s b5ect to modification than is s ally the case +ith an implementation( Therefore, fle.ibility and ease of modification +ere important implementation goals( 1ltho gh the implementation +as not a commercial enterprise, neither +as it a toy or a system intended only for a fe+ J2friendly sers(2 It +as designed to be complete, rob st, easy to maintain, and s fficiently efficient to be sef l for real applications in its problem domain( C.perience +ith earlier implementations of *#6/6&<, *&A, and the Eatfor implementation of Icon also infl enced the implementation that is described here( They pro)ided a repertoire of pro)en techniI es and a philosophy of approach to the implementation of a programming lang age that has no)el feat res( The comp ting en)ironment also played a ma5or role( The implementation started on a P%P-11!F0 r nning nder U#ID( The U#ID en)ironment 3Eitchie and Thompson 1:FG4, +ith its e.tensi)e range of tools for program de)elopment, infl enced se)eral aspects of the implementation in a direct +ay( C 38ernighan and Eitchie 1:FG4 is the nat ral lang age for +riting s ch an implementation nder U#ID, and its se for the ma5ority of Icon had per)asi)e effects, +hich are described thro gho t this boo0( Tools, s ch as the Uacc parser-generator 3Johnson 1:FA4, infl enced the approach to the translation portion of the implementation( *ince the initial +or0 +as done on a P%P-11!F0, +ith a ser address space of only l2G8 bytes 3combined instr ction and data spaces4, the si7e of the implementation +as a significant concern( In partic lar, +hile the Eatfor implementation of Icon fit comfortably on comp ters +ith large address spaces, s ch as the %CC-10, C%C Cyber, and I/> 3F0, this implementation +as m ch too large to fit on a P%P-11!F0(

&.1 The con 'irt#al Machine


The implementation of Icon is organi7ed aro nd a )irt al machine 3#e+ey, Poole, and 9aite 1:F2, "ris+old 1:FF4( 'irt al machines, sometimes called abstract machines, ser)e as soft+are design tools for implementations in +hich the operations of a lang age do not fit a partic lar comp ter architect re or +here portability is a consideration and the attrib tes of se)eral real comp ter architect res can be abstracted in a single common model( The e.pectation for most )irt al machine models is that a translation +ill be performed to map the )irt al machine operations onto a specific real machine( 1 )irt al machine also pro)ides a basis for de)eloping an operational definition of a programming lang age in +hich details can be +or0ed o t in concrete terms(

3!

% ring the design and de)elopment phases of an implementation, a )irt al machine ser)es as an ideali7ed model that is free of the details and idiosyncrasies of any real machine( The )irt al machine can be designed in s ch a +ay that treatment of specific, machinedependent details can be deferred ntil it is necessary to translate the implementation of the )irt al machine to a real one( IconJs )irt al machine only goes so far( Unli0e the *#6/6&< )irt al machine 3"ris+old 1:F24, it is incomplete and characteri7es only the e.pression-e)al ation mechanism of Icon and comp tations on Icon data( It does not, per se, incl de a model for the organi7ation of memory( There are many aspects of the Icon r n-time system, s ch as type chec0ing, storage allocation and garbage collection, that are not represented in the )irt al machine( Instead IconJs )irt al machine ser)es more as a g ide and a tool for organi7ing the implementation than it does as a rigid str ct re that dominates the implementation(

&.$ Components of the Implementation


There are three ma5or components of the )irt al machine implementation of Icon@ a translator, a lin0er, and a r n-time system( The translator plays the role of a compiler for the Icon )irt al machine( It analy7es so rce programs and con)erts them to )irt al machine instr ctions( The o tp t of the translator is called ucode. Ucode is represented as 1*CII +hich is helpf l in deb gging the implementation( The lin0er combines one or more code files into a single program for the )irt al machine( This allo+s programs to be +ritten and translated in a n mber of mod les, and it is partic larly sef l for gi)ing sers access to pretranslated libraries of Icon proced res( The o tp t of the lin0er, called icode, is in binary format for compactness and ease of processing by the )irt al machine( Ucode and icode instr ctions are essentially the same, differing mainly in their format( Translating and lin0ing are done in t+o phases@

These phases can be performed separately( If only the first phase is performed, the res lt is code, +hich can be sa)ed and lin0ed at another time( The r n-time system consists of an interpreter for icode and a library of s pport ro tines to carry o t the )ario s operations that may occ r +hen an Icon program is e.ec ted( The interpreter ser)es, concept ally, as a soft+are reali7ation of the Icon )irt al machine( It decodes icode instr ctions and their operands and carries o t the corresponding operations( It is +orth noting that the organi7ation of the Icon system does not depend in any essential +ay on the se of an interpreter( In fact, in the early )ersions of this implementation, the lin0er prod ced assembly-lang age code for the target machine( That code then +as assembled and loaded +ith the r n-time library( 6n the s rface, the generation of machine code for a specific target machine rather than for a )irt al machine corresponds to the con)entional compilation approach( =o+e)er, this is some+hat of an ill sion, since the machine code consists largely of calls to r n-time library ro tines corresponding to )irt al machine instr ctions( C.ec tion of machine code in s ch an implementation therefore

3"

differs only slightly from interpretation, in +hich instr ction decoding is done in soft+are rather than in hard+are( The difference in speed in the case of Icon is relati)ely minor( 1n interpreter offers a n mber of ad)antages o)er the generation of machine code that offset the small loss of efficiency( The main ad)antage is that the interpreter gets into e.ec tion )ery I ic0ly, since it does not reI ire a loading phase to resol)e assemblylang age references to library ro tines( Icode files also are m ch smaller than the e.ec table binary files prod ced by a loader, since the r n-time library does not need to be incl ded in them( Instead, only one sharable copy of the r n-time system needs to be resident in memory +hen Icon is e.ec ting(

&.& The Translator


The translator that prod ces code is relati)ely con)entional( It is +ritten entirely in C and is independent of the architect re of the target machine on +hich Icon r ns( Ucode is portable from one target machine to another( The translator consists of a le.ical analy7er, a parser, a code generator, and a fe+ s pport ro tines( The le.ical analy7er con)erts a so rce-lang age program into a stream of to0ens that are pro)ided to the parser as they are needed( The parser generates abstract synta. trees on a per-proced re basis( These abstract synta. trees are in t rn processed by the code generator to prod ce code( The parser is generated a tomatically by Uacc from a grammatical specification( *ince the translator is relati)ely con)entional and the techniI es that it ses are described in detail else+here 31ho, &am, *ethi, and Ullman 200B4, it is not disc ssed here( There is one aspect of le.ical analysis that deser)es mention( The body of an Icon proced re consists of a series of e.pressions that are separated by semicolons( =o+e)er, these semicolons s ally do not need to be pro)ided e.plicitly, as ill strated by e.amples in Chapter 2( Instead, the le.ical analy7er performs semicolon insertion( If a line of a program ends +ith a to0en that is legal for ending an e.pression, and if the ne.t line begins +ith a to0en that is legal for beginning an e.pression, the le.ical analy7er generates a semicolon to0en bet+een the lines( $or e.ample, the t+o lines
i := j + 3 write(i)

are eI i)alent to
i := j + 3; write(i)

since an integer literal is legal at the end of an e.pression and an identifier is legal at the beginning of an e.pression( If an e.pression spans t+o lines, the place to di)ide it is at a to0en that is not legal at the end of a line( $or e.ample,
s1 := s2 || s3

is eI i)alent to
s1 := s2 || s3

#o semicolon is inserted, since OO is not legal at the end of an e.pression(

3#

&.( The "inker


The lin0er reads code files and +rites icode files( 1n icode file consists of an e.ec table header that loads the r n-time system, descripti)e information abo t the file, operation codes and operands, and data specific to the program( The lin0er, li0e the translator, is +ritten entirely in C( 9hile con)ersion of code to icode is largely a matter of reformatting, the lin0er performs t+o other f nctions( 3(4(1 )cope *esolution The scope of an ndeclared identifier in a proced re depends on global declarations 3e.plicit or implicit4 in the program in +hich the proced re occ rs( *ince the translator in general operates on only one mod le of a program, it cannot resol)e the scope of ndeclared identifiers, beca se not all global scope information is contained in anyone mod le( The lin0er, on the other hand, processes all the mod les of a program, and hence it has the tas0 of resol)ing the scope of ndeclared identifiers( 1n identifier may be global for se)eral reasons@ 1s the res lt of an e.plicit global declaration( 1s the name in a record declaration( 1s the name in a proced re declaration( 1s the name of a b ilt-in f nction( If an identifier +ith no local declaration falls into one of these categories, it is global( 6ther+ise it is local(

3(4(2 Construction of *un+Time )tructures 1 n mber of aspects of a so rce-lang age Icon program are represented at r n time by )ario s data str ct res( These str ct res are described in detail in s bseI ent chapters( They incl de proced re bloc0s, strings, and bloc0s for cset and real literals that appear in the program( This data is represented in code in a machine-independent fashion( The lin0er con)erts this information into binary images that are dependent on the architect re of the target comp ter(

&.) The *#n+Time ,ystem


>ost of the interesting aspects of the implementation of Icon reside in its r n-time system( This r n-time system is +ritten mostly in C, altho gh there are a fe+ lines of assemblylang age code for chec0ing for arithmetic o)erflo+ and for co-e.pressions( The C portion is mostly machine-independent and portable, altho gh some machine-specific code is needed for some idiosyncratic comp ter architect res and to interface some operatingsystem en)ironments( There are t+o main reasons for concentrating the implementation in the r n-time system@

*ome feat res of Icon do not lend themsel)es to translation directly into e.ec table code for the target machine, since there is no direct image for them in the targetmachine architect re( The target machine code necessary to carry o t these operations therefore is too large to place in line, instead, it is placed in library ro tines that are called from in-line code( * ch feat res range from operations on str ct res to string scanning(

4&

6perations that cannot be determined at translation time m st be done at r n time( * ch operations range from type chec0ing to storage allocation and garbage collection(

The r n-time system is logically di)ided into fo r main parts@ initiali7ation and termination ro tines, the interpreter, library ro tines called by the interpreter, and s pport ro tines called by library ro tines( Initiali/ation and Termination Ro"tines. The initiali7ation ro tine sets p regions in +hich ob5ects created at r n time are allocated( It also initiali7es some str ct res that are sed d ring program e.ec tion( 6nce these tas0s are completed, control is transferred to the Icon interpreter( 9hen a program terminates, either normally or beca se of an error, termination ro tines fl sh o tp t b ffers and ret rn control to the operating system( The Interpreter. The interpreter analy7es icode instr ctions and their operands and performs corresponding operations( The interpreter is relati)ely simple, since most comple. operations are performed by library ro tines( The interpreter itself is described in Chapter G( Li-rar Ro"tines. &ibrary ro tines are di)ided into three categories, depending on the +ay they are called by the interpreter@ ro tines for Icon operators, ro tines for Icon b iltin f nctions, and ro tines for complicated )irt al machine instr ctions( The meanings of operators are 0no+n to the translator and lin0er, and hence they can be called directly( 6n the other hand, the meanings of f nctions cannot be determined ntil they are e.ec ted, and hence they are called indirectly( !"pport Ro"tines. * pport ro tines incl de storage allocation and garbage collection, as +ell as type chec0ing and con)ersion( * ch ro tines typically are called by library ro tines, altho gh some are called by other s pport ro tines( ECTE6*PCCTI'C@ * perficially, the implementation of Icon appears to be con)entional( 1n Icon program is translated and lin0ed to prod ce an e.ec table binary file( The translator and lin0er are con)entional, e.cept that they generate code and data str ct res for a )irt al machine instead of for a specific comp ter( The r n-time system dominates the implementation and plays a m ch larger role than is played by r n-time systems in con)entional implementations( This r n-time system is the foc s of the remainder of this boo0( EXERCISES 3(1 C.plain +hy there is only a comparati)ely small difference in e.ec tion times bet+een a )ersion of Icon that generates assembly-lang age code and one that generates )irt al machine code that is interpreted( 3(2 &ist all the to0ens in the Icon grammar that are legal as the beginning of an e.pression and as the end of an e.pression( 1re there any to0ens that are legal as both? 1s neither? 3(3 Is a semicolon inserted by the le.ical analy7er bet+een the follo+ing t+o program lines? s1 @P s2 OO s3

41

3(< Is it possible for semicolon insertion to introd ce syntactic errors into a program that +o ld be syntactically correct +itho t semicolon insertion? 3(A 9hat +o ld be the ad)antages and disad)antages of merging the Icon translator and lin0er into a single program?

42

Chapter 4: ,alues and ,aria-les


PCE*PCCTI'C@ #o feat re of the Icon programming lang age has a greater impact on the implementation than ntyped )ariables-)ariables that ha)e no specific type associated +ith them( This feat re originated in IconJs predecessors as a res lt of a desire for simplicity and fle.ibility( The absence of type declarations red ces the amo nt that a programmer has to learn and remember( It also ma0es programs shorter and 3perhaps4 easier to +rite( The fle.ibility comes mainly from the s pport for heterogeneo s aggregates( 1 list, for e.ample, can contain a mi.t re of strings, integers, records, and other lists( There are n mero s e.amples of Icon programs in +hich this fle.ibility leads to programming styles that are concise and simple( *imilarly, 2generic2 proced res, +hose arg ments can be of any type, often are sef l, especially for modeling e.perimental lang age feat res( 9hile these facilities can be pro)ided in other +ays, s ch as by CJs nion constr ct, Icon pro)ides them by the absence of feat res, +hich fits +ith the philosophy of ma0ing it easy to +rite good programs rather than hard to +rite bad ones( The other side of the coin is that the lac0 of type declarations for )ariables ma0es it impossible for the translator to detect most type errors and defers type chec0ing ntil the program is e.ec ted( Th s, a chec0 that can be done only once at translation time in a lang age +ith a strong compile-time type system m st be done repeatedly d ring program e.ec tion in Icon( $ rthermore, 5 st as the Icon translator cannot detect most type errors, a person +ho is +riting or reading an Icon program does not ha)e type declarations to help clarify the intent of the program( Icon also con)erts arg ments to the e.pected type +here possible( This feat re is, ne)ertheless, separable from type chec0ing, Icon co ld ha)e the latter +itho t the former( =o+e)er, type chec0ing and con)ersion are nat rally intert+ined in the implementation( 1s far as the implementation is concerned, ntyped )ariables simplify the translator and complicate the r n-time system( There is little the translator can do abo t types( >any operations are polymorphic, ta0ing arg ments of different types and sometimes performing significantly different comp tations, depending on those types( >any types are con)ertible to others( *ince proced res are data )al es and may change meaning d ring program e.ec tion, there is nothing the translator can 0no+ abo t them( $or this reason, the translator does not attempt any type chec0ing or generate any code for type chec0ing or con)ersion( 1ll s ch code resides in the r n-time ro tines for the f nctions and operations themsel)es( There is a more s btle +ay in +hich ntyped )ariables infl ence the implementation( *ince any )ariable can ha)e any type of )al e at any time, and can ha)e different types of )al es at different times, all )al es m st be the same si7e( $ rthermore, IconJs rich repertoire of data types incl des )al es of arbitrary si7e-lists, tables, proced res, and so on( The sol tion to this problem is the concept of a descriptor, +hich either contains the data for the )al e, if it is small eno gh, or else contains a pointer to the data if it is too large to fit into a descriptor( The tric0, then, is to design descriptors for all of IconJs data types, balancing considerations of si7e, ease of type testing, and efficiency of accessing the act al data(

43

(.1 -escriptors
*ince e)ery Icon )al e is represented by a descriptor, it is important that descriptors be as small as possible( 6n the other hand, a descriptor m st contain eno gh information to determine the type of the )al e that it represents and to locate the act al data( 1ltho gh )al es of some types cannot possibly fit into any fi.ed-si7e space, it is desirable for freI ently sed, fi.ed-si7ed )al es, s ch as integers, to be stored in their descriptors( This allo+s )al es of these types to be accessed directly and a)oids the need to pro)ide storage else+here for s ch )al es( If Icon +ere designed to r n on only one 0ind of comp ter, the si7e and layo t of the descriptor co ld be tailored to the architect re of the comp ter( *ince the implementation is designed to r n on a +ide range of comp ter architect res, Icon ta0es an approach similar to that of C( Its descriptor is composed of 2J+ords,2 +hich are closely related to the concept of a +ord on the comp ter on +hich Icon is implemented( 6ne +ord is not large eno gh for a descriptor that m st contain both type information and an integer or a pointer( Therefore, a descriptor consists of t+o +ords, +hich are designated as the d-word and the )-word, indicating that the former contains descripti)e information, +hile the latter contains the )al e

The dotted line bet+een the t+o +ords of a descriptor is pro)ided for readability( 1 descriptor is merely t+o +ords, and the fact that these t+o +ords constit te a descriptor is a matter of conte.t( The )-+ord of a descriptor may contain either a )al e, s ch as an integer, or a pointer to other data( In C terms( the )-+ord may contain a )ariety of types, incl ding both ints and pointers( 6n many comp ters, C ints and C pointers are the same si7e( $or some comp ters, ho+e)er, C compilers ha)e a large-memory-model option in +hich integers are 1B bits long, allo+ing efficient arithmetic, +hile pointers are 32 bits long, allo+ing access to a large amo nt of memory( In this sit ation, C longs are the same si7e as C pointers( There are other models, as +ell as comp ters +ith other +ord si7es, b t the main considerations in the implementation of Icon are the accommodation of comp ters +ith 1B- and 32-bit +ords and the large-memory model, in +hich pointers are larger than integers( In the large-memory model, a )-+ord m st accommodate the largest of the types( The d-+ords of descriptors contain a type code 3a small integer4 in their least significant bits and flags in their most significant bits( There are t+el)e type codes that correspond to so rce-lang age data types@ data type n ll integer real n mber cset file proced re list set table type code n ll integer or long real cset file proc list set table

44

record co-e.pression

record coe.pr

6ther type codes e.ist for internal ob5ects, +hich are on a par +ith so rce-lang age ob5ects, from an implementation )ie+point, b t +hich are not )isible at the so rcelang age le)el( The act al )al es of these codes are not important, and they are indicated in diagrams by their type code names( 4(1(1 )trings There is no type code for strings( They ha)e a special representation in +hich the d-+ord contains the length of the string 3the n mber of characters in it4 and the )-+ord points to the first character in the string@

*tring descriptors are called ualifiers. In order to ma0e I alifiers more intelligible in the diagrams that follo+, a pointer to a string is follo+ed by the string in I otation mar0s rather than by an address( $or e.ample, the I alifier for 2hello2 is depicted as

In order to disting ish I alifiers from other descriptors +ith type codes that might be the same as a string length, all descriptors that are not I alifiers ha)e an n flag in the most significant bit of the d-+ord( The d-+ords of I alifiers do not ha)e this n flag, and string lengths are restricted to pre)ent their o)erflo+ into this flag position in sit ations +here +ords are only 1B bits long( 4(1(2 The .ull ,alue 1 descriptor for the n ll )al e has the form

1s e.plained pre)io sly, the n flag occ rs in this and all other descriptors that are not I alifiers so that strings can be easily and nambig o sly disting ished from all other 0inds of )al es( The )al e in the )-+ord co ld be any constant )al e, b t 7ero is sef l and easily identified---and s ggests 2n ll(2 4(1(3 Integers Icon s pports 32-bit integers, regardless of the comp ter on +hich it is implemented( * ch integers therefore are either C ints or longs, depending on the comp ter architect re( 6n comp ters +ith 32-bit ints, the )al e of an Icon integer is stored in the )-+ord of its descriptor( $or e.ample, the integer 13AF0 is represented by

45

#ote that the n flag disting ishes this descriptor from a string +hose first character might be at the address 13AF0 and +hose length might ha)e the same )al e as the type code for integer( 6n comp ters +ith 1B-bit ints, an Icon integer that fits in 1B bits also is stored in the )+ord of a descriptor( 1n integer that is too large to fit into a +ord is stored in a bloc0 that is pointed to by the )-+ord, as ill strated in the ne.t section( The t+o representations of integers are disting ished by different internal type codes@ integer for integers that are contained in the )-+ords of their descriptors and long for integers that are contained in bloc0s pointed to by the )-+ords of their descriptors( Th s, there are t+o internal types for one so rce-lang age data type(

The p flag in the descriptor indicates that the )-+ord contains a pointer to a bloc0( /loc0s of some other types, s ch as record bloc0s, )ary in si7e from )al e to )al e, b t anyone bloc0 is fi.ed in si7e and ne)er gro+s or shrin0s( If the type code in the title does not determine the si7e of the bloc0, the second +ord in the bloc0 contains its si7e in bytes( In the diagrams that follo+, the si7es of bloc0s are gi)en for comp ters +ith 32-bit +ords( The diagrams +o ld be slightly different for comp ters +ith 1B-bit +ords( Eecords, +hich differ in si7e depending on ho+ many fields they ha)e, are e.amples of bloc0s that contain their si7es( $or e.ample, gi)en the record declaration
record complex(r, i)

and
point := complex(1, 3)

the )al e of point is

The record-constr ctor bloc0 contains information that is needed to resol)e field references(

6n the other hand, +ith the declaration


record term(value, code, count)

and
word := term("chair", "noun", 4)

the )al e of +ord is@

1s ill strated by these e.amples, bloc0s may contain descriptors as +ell as non-descriptor data( #on-descriptor data comes first in the bloc0, follo+ed by any descriptors, as ill strated by the preceding fig re( The location of the first descriptor in a bloc0 is constant for all bloc0s of a gi)en type, +hich facilitates garbage collection( /loc0s for the remaining types are described in s bseI ent chapters(

(.& 'aria.les
'ariables are represented by descriptors, 5 st as )al es are( This representation allo+s )al es and )ariables to be treated niformly in terms of storage and access( 'ariables for identifiers point to descriptors for the corresponding )al es( 'ariables al+ays point to descriptors for )al es, ne)er to other )ariables( $or e.ample, if
s := "hello"

then a )ariable for s has the form

The ) flag disting ishes descriptors for )ariables from descriptors for )al es( The )al es of local identifiers are 0ept on a stac0, +hile the )al es of global and static identifiers are located at fi.ed places in memory( 'ariables that point to the )al es of identifiers are created by icode instr ctions that correspond to the se of the identifiers in the program( *ome )ariables, s ch as record field references, are comp ted( 1 )ariable that references a )al e in a data str ct re points directly to the descriptor for the )al e( The least-significant bits of the d-+ord for s ch a )ariable contain the offset, in words, of the )al e descriptor

4!

from the top of the bloc0 in +hich the )al e is contained( This offset is sed by the garbage collector( The se of +ords, rather than bytes, allo+s larger offsets, +hich is important for comp ters +ith 1B-bit +ords( $or e.ample, the )ariable +ord(co nt for the record gi)en in the preceding section is

The )ariable points directly to the )al e rather than to the title of the bloc0 so that access to the )al e is more efficient( #ote that the )ariable +ord(co nt cannot be determined at translation time, since the type of +ord is not 0no+n then and different record types co ld ha)e co nt fields in different positions(
4(3(1 $perations on ,aria-les

There are t+o f ndamentally different conte.ts in +hich a )ariable can be dereferencing and assignment.

sed@

* ppose, as sho+n pre)io sly, that the )al e of the identifier s is the string 2hello2( Then a )ariable descriptor that points to the )al e of s and the corresponding )al e descriptor for 2hello2 ha)e the follo+ing relationship@

In an e.pression s ch as +rite3s4, s is dereferenced by fetching the descriptor pointed to by the )-+ord of the )ariable( In the case of assignment, as in
s := 13570

the )al e descriptor pointed to by the )-+ord of the )ariable descriptor changed@

These operations on )ariables correspond to indirect load and store instr ctions of a typical comp ter(

4"

4(3(2 Trapped ,aria-les Icon has se)eral )ariables +ith special properties that complicate assignment and dereferencing( Consider, for e.ample, the 0ey+ord Ltrace( Its )al e m st al+ays be an integer( ConseI ently, in an assignment s ch as
&trace := expr

the )al e prod ced by expr m st be chec0ed to be s re that it is an integer( If it is not, an attempt is made to con)ert it to an integer, so that in
&trace := "1"

the )al e assigned to Ltrace is the integer 1, not the string 212( There are fo r 0ey+ord )ariables that reI ire special processing for assignment@ Ltrace, Lrandom, Ls b5ect, and Lpos( The 0ey+ord Lrandom is treated in essentially the same +ay that Ltrace is( 1ssignment to Ls b5ect reI ires a string )al e and has the side effect of assigning the )al e 1 to Lpos( 1ssignment to Lpos is e)en more complicated@ not only m st the )al e assigned be an integer, b t if it is not positi)e, it m st also be con)erted to the positi)e eI i)alent +ith respect to the length of Ls b5ect( In any e)ent, if the )al e in the assignment to Lpos is not in the range of Ls b5ect, the assignment fails( %ereferencing these 0ey+ords, on the other hand, reI ires no special processing( 1 nai)e +ay to handle assignment to these 0ey+ords is to chec0 e)ery )ariable d ring assignment to see +hether it is one of the fo r that reI ires special processing( This +o ld place a significant comp tational b rden on e)ery assignment( Instead, Icon di)ides )ariables into t+o classes@ ordinary and trapped. 6rdinary )ariables point to their )al es as ill strated pre)io sly and reI ire no special processing( Trapped )ariables, so called beca se their processing is 2trapped,2 are disting ished from ordinary )ariables by a t flag( Th s, assignment only has to chec0 a single flag to separate the ma5ority of )ariables from those that reI ire special processing( 1 trapped-)ariable descriptor for a 0ey+ord points to a bloc0 that contains the )al e of the 0ey+ord, its string name, and a pointer to a C f nction that is called +hen assignment to the 0ey+ord is made( $or e.ample, the trapped )ariable for Ltrace is@

It is +orth noting that the more con)entional approach to handling the problem of assignment to 0ey+ords is to compile special code if a 0ey+ord occ rs an assignment conte.t( It is not al+ays possible, ho+e)er, to determine the conte.t in +hich a )ariable is sed in Icon( Consider a proced re of the form
procedure diagnose(s) return &trace end

The semantics of Icon dictate that the res lt ret rned in this case sho ld be a )ariable, not 5 st its )al e, so that it is possible to +rite an e.pression s ch as
diagnose(s) := 10

4#

+hich has the effect of assigning the )al e 10 to Ltrace( The translator has no +ay of 0no+ing that an assignment to the call diagnose3s4 is eI i)alent to an assignment to Ltrace( In fact, the translator cannot e)en determine that the )al e of diagnose +ill be a f nction +hen the pre)io s assignment is performed, m ch less that it +ill be the proced re gi)en earlier( Th s, the trapped-)ariable mechanism pro)ides a +ay to handle, niform all the sit ations in +hich s ch a 0ey+ord can be sed(

(.( -escriptors and Blocks in C


%escriptors and bloc0s of data are described and depicted abstractly in the pre)io s sections of this chapter( In order to nderstand the implementation of some aspects of Icon, it is helpf l to e.amine the C code that act ally defines and manip lates data( The follo+ing sections ill strate typical C declarations for the str ct res sed in the implementation of Icon( *ome of the terminology and operations that appear freI ently in the C code are incl ded as +ell( 6ther operations are introd ced in s bseI ent chapters( as they are needed( 4(4(1 /escriptors 1s mentioned in *ec( <(1, for C compilers in +hich ints and pointers are the same si7e, the si7e of a +ord is the si7e of an int, +hile if pointers are larger than ints, the si7e of a +ord is the si7e of a long( The difference bet+een these t+o models of memory is handled by typedefs nder the control of conditional compilation( T+o constants that characteri7e the si7es are defined@ Int*i7e and Ptr*i7e( If these si7es are different, the constant >i.ed*i7es is defined@
#if IntSize != PtrSize #define MixedSizes #endif

This constant is sed to select appropriate definitions for signed and nsigned +ords@
#ifdef MixedSizes typedef long word; typedef unsigned long uword; #else typedef int word; typedef unsigned int uword; #endif

1 descriptor is declared as a str ct re@


struct descrip { /* descriptor */ word dword; /* type field */ union { word integr; /* integer value */ char *sptr; /* pointer to character string */ union block *bptr; /* pointer to a block */ struct descrip *dptr; /* pointer to a descriptor */ } vword; };

The )-+ord of a descriptor is a nion that reflects its )ario s ses@ an integer, a pointer to a string, a pointer to a bloc0, or a pointer to another descriptor 3in the case of a )ariable4(

5&

4(4(2 0loc1s Cach bloc0 type has a str ct re declaration( $or e.ample( the declaration for record bloc0s is
struct b_record { /* record block */ word title; /* T_Record */ word blksize; /* size of block */ struct descrip recdesc; /* record constructor descriptor */ struct descrip fields[1];/* fields */ };

/loc0s for records )ary in si7e, depending on the n mber of fields declared for the record type( The si7e of 1 in
struct descrip fields[1];

is pro)ided to satisfy the C compiler( 1ct al bloc0s for records are constr cted at r n time in a region that is managed by IconJs storage allocator( * ch bloc0s conform to the pre)io s declaration, b t the n mber of fields )aries( The declaration pro)ides a means of accessing portions of s ch bloc0s from C( The declaration for 0ey+ord trapped-)ariable bloc0s is
struct b_tvkywd { /* keyword trapped variable block */ word title; /* T Tvkywd */ int (*putval) (); /* assignment function */ struct descrip kyval; /* keyword value */ struct descrip kyname;/* keyword name */ };

#ote that the title fields of bVrecord and bVt)0y+d contain type codes, as indicated in pre)io s diagrams( The second field of bVrecord is a si7e as mentioned pre)io sly, b t bVt)0y+d has no si7e field, since all 0ey+ord trapped-)ariable bloc0s are the same si7e, +hich therefore can be determined from their type( The bloc0 nion gi)en in the declaration of descrip consists of a nion of all bloc0 types@
union block { struct b_int longint; struct b_real realblk; struct b_cset cset; struct b_file file; struct b_proc proc; struct b_list list; struct b_lelem lelem; struct b_table table; struct b_telem telem; struct b_set set; struct b,selem selemstruct b,record recordstruct b,tvk+)d tvk+)dstruct b,tvsubs tvsubsstruct b,tvtbl tvtblstruct b,coexpr coexprstruct b,refres* refres*%-

#ote that there are se)eral 0inds of bloc0s in addition to those that correspond to so rcelang age data types(

51

4(4(3 /efined Constants

The type codes are defined symbolically@


#define #define #define #define #define #define #define #define #define #define #define #define #define #define #define #define #define #define #define T_Null T_Integer T_Long T_Real T_Cset T_File T_Proc T_List T_Table T_Record T Telem T_Lelem T_Tvsubs T_Tvkywd T_Tvtbl T_Set T_Selem T_Refresh T_Coexpr 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18

The type codes in diagrams are abbre)iated, as indicated by pre)io s e.amples( The defined constants for d-+ord flags are
n p v t F_Nqual F_Ptr F_Var F_Tvar

The )al es of these flags depend on the +ord si7e of the comp ter( The d-+ords of descriptors are defined in terms of flags and type codes@
#define #define #define #define #define #define #define #define #define #define #define #define #define #define #define #define #define #define #define #define #define D_Null D_Integer D_Long D_Real D_Cset D_File D_Proc D_List D_Table D_Set D_Selem D_Record D_Telem D_Lelem D_Tvsubs D_Tvtbl D_Tvkywd D_Coexpr D_Refresh D_Var D_Tvar (T_Null | F_Nqual) (T_Integer | F_Nqual) (T_Long | F_Ptr | F_Nqual) (T_Real | F_Ptr | F_Nqual) (T_Cset | F_Ptr | F_Nqual) (T_File | F_Ptr | F_Nqual) (T_Proc | F_Ptr | F_Nqual) (T_List | F_Ptr | F_Nqual) (T_Table | F_Ptr | F_Nqual) (T_Set | F_Ptr | F_Nqual) (T_Selem | F_Ptr | F_Nqual) (T_Record | F_Ptr | F_Nqual) (T_Telem | F_Ptr | F_Nqual) (T_Lelem | F_Ptr | F_Nqual) (T_Tvsubs | D_Tvar) (T Tvtbl | D_Tvar) (T_Tvkywd | D_Tvar) (T_Coexpr | F_Ptr | F_Nqual) (T_Refresh | F_Ptr | F_Nqual) (F_Var | F _Nqual | F _Ptr) (D_Var | F_Tvar)

1s indicated pre)io sly, flags, type codes, and d-+ords are disting ished by the prefi.es $V, TV, and %V, respecti)ely(

52

4(4(4 C Coding Conventions 1 n mber of con)entions are sed in the C ro tines for the r n-time system to red ce detail and to foc s on the +ay that Icon data is organi7ed( *ome of these are ill strated by the C f nction for the Icon operator K., +hich prod ces the si7e of .@
OpDcl(size, 1, "*") { char sbuf[MaxCvtLen]; Arg0.dword = D_lnteger; if (Qual(Arg1)) { /* * If Arg1 is a string, return the length of the string. */ IntVal(Arg0) = StrLen(Arg1); } else { /* * Arg1 is not a string. For most types, the size is * field of the block. * structure. */ switch (Type(Arg1)) { case T_List: IntVal(Arg0) = BlkLoc(Arg1)->list.size; break; case T_Table: IntVal(Arg0) = BlkLoc(Arg1)->table.size; break; case T_Set: IntVal(Arg0) = BlkLoc(Arg1)->set.size; break; case T_Cset: IntVal(Arg0) = BlkLoc(Arg1)->cset.size; break; ... default: /* * Try to convert it to a string. */ if (cvstr(&Arg1, sbuf) == CvtFail) runerr(112, &Arg1); /* no notion of size */ IntVal(Arg0) = StrLen(Arg1); } % .eturn: %

6p%cl is a macro that performs se)eral operations( 6ne of these operations is to pro)ide a C f nction declaration( *ince the f nction is called by the interpreter, the header is some+hat different from +hat it +o ld be if si7e +ere called directly( The details are described in Chapter G( /y con)ention, the arg ments of the Icon operation are referred to )ia 1rg1, 1rg2, ((( (The res lt that is prod ced for an operator is left in 1rg0 rather than being gi)en as an arg ment of ret rn( Th s, in the case of !x, the )al e of . is in 1rg1 and the ret rned si7e is placed in 1rg0(

53

$irst, the d-+ord of 1rg0 is set to %Vlnteger, since the ret rned )al e is an integer( #e.t, there is a test to determine if 1rg1 is a I alifier( H al is a macro that is defined as
#define Qual(d) (!((d).dword & F_Nqual))

If 1rg1 is a I alifier, its length is placed in the )-+ord of 1rg0, sing the macros Int'al and *tr&en, +hich are defined as
#define IntVal(d) #define StrLen(d) ((d).vword.integr) ((d).dword)

If 1rg1 is not a I alifier, then the si7e depends on the type( The macro Type isolates the type code
#define Type(d) ((d).dword & TypeMask)

+here the )al e of Type>as0 is B3, pro)iding considerable room for additions to IconJs 1: internal types( $or most Icon types that are represented by bloc0s, their so rce-lang age si7e is contained in their si7e field( The macro /l0&oc accesses a pointer in the )-field of a descriptor and is defined as
#define BlkLoc(d) ((d).vword.bptr)

If the type is not one of these, the final tas0 is an attempt to con)ert 1rg1 to a string( The s pport ro tine c)str does this, sing the b ffer sb f pro)ided by si7e( The )al e of 1rg1 is changed accordingly, note that its address is pro)ided to c)str( 1 fi.ed-si7ed b ffer can be sed, since there is a limit to the si7e of a string that can be obtained by con)erting other types( This limit is 2AB, +hich is reached only for con)ersion of Lcset( The con)ersion may fail, as for KLn ll, +hich is signalled by the ret rn )al e C)t$ail from c)str( In this case, program e.ec tion is terminated +ith a r n-time@ error message, sing r nerr( If the con)ersion is s ccessf l, the si7e is placed in the )-+ord of 1rg0, as is the case if 1rg1 +as a I alifier originally( #ote that the original test for a I alifier co ld be replaced by a call to c)str, and the call to c)str in the defa lt of the s+itch statement co ld be eliminated( The code is +ritten the +ay it is for efficiency, a)oiding the call to c)str in the common case that the arg ment is a string( It is +orth noting that a special case is needed for strings, since a I alifier has no type code and a test for a string cannot be incl ded in the s+itch statement( The macro Eet rn ret rns from the f nction and signals the interpreter that a res lt has been prod ced( ECTE6*PCCTI'C@ %escriptors pro)ide a niform +ay of representing Icon )al es and )ariables( *ince descriptors for all types of data are the same si7e, there are no problems +ith assigning different types of )al es to a )ariable---they all fit( The importance of strings is reflected in the separation of descriptors into t+o classes--I alifiers and nonI alifiers---by the n flag( The ad)antages of the I alifier representation for strings are disc ssed in Chapter A( It is comparati)ely easy to add a ne+ type to Icon( 1 ne+ type code is needed to disting ish it from other types( If the possible )al es of the ne+ type are small eno gh to fit into the )-+ord, as is the case for integers, no other data is needed( $or e.ample, the )al e of a character data type co ld be contained in its descriptor( $or types that ha)e )al es that are too large to fit into a )-+ord, pointers to bloc0s containing the data are placed in the )-+ords instead( &ists, sets, and tables are e.amples of data types that are represented this +ay( *ee Chapters B and F(

54

EXERCISES <(1 "i)e e.amples of Icon programs in +hich heterogeneo s aggregates are sed in significant +ays( <(2 %esign a system of type declarations for Icon so that the translator co ld do type chec0ing( "i)e special consideration to aggregates, especially those that may change in si7e d ring program e.ec tion( %o this from t+o perspecti)es@ 3a4 changing the semantics of Icon as little as possible, and 3b4 ma.imi7ing ,the type chec0ing that can be done by the translator at the e.pense of fle.ibility in programming( <(3 * ppose that f nctions in Icon +ere not first-class )al es and that their meanings +ere bo nd at translation time( =o+ m ch co ld the translator do in the +ay of error chec0ing? <(< Compile a list of all Icon f nctions and operators( 1re there any that do not reI ire arg ment type chec0ing? 1re there any that reI ire type chec0ing b t not con)ersion? Identify those that are polymorphic( $or the polymorphic ones, identify the different 0inds of comp tations that are performed depending on the types of the arg ments( <(A Compose a table of all type chec0s and con)ersions that are reI ired for Icon f nctions and operators( <(B To +hat e.tent +o ld the implementation of Icon be simplified if a tomatic type con)ersion +ere not s pported? =o+ +o ld this affect the programmer? <(F 9hy is it desirable for string I alifiers not to ha)e flags and for all other 0inds of descriptors to ha)e flags indicating they are not I alifiers, rather than the other +ay aro nd? <(G Is the n flag that disting ishes string I alifiers from all other descriptors really necessary? If not, e.plain ho+ to disting ish the different types of descriptors +itho t this flag( <(: 6n comp ters +ith e.tremely limited address space, t+o-+ord descriptors may be impractically large( %escribe ho+ one-+ord descriptors might be designed, disc ss ho+ )ario s types might be represented, and describe the ramifications for storage tili7ation and e.ec tion speed( <(10 Identify the diagrams in this chapter that +o ld be different if they +ere dra+n for a comp ter +ith 1B-bit +ords( Indicate the differences( <(11 There is nothing in the nat re of 0ey+ords that reI ires them to be processed in a special +ay for assignment b t not for dereferencing( In)ent a ne+ 0ey+ord that is a )ariable that reI ires processing +hen it is dereferenced( *ho+ ho+ to generali7e the 0ey+ord trapped-)ariable mechanism to handle s ch cases( <(12 &ist all the syntactically distinct cases in +hich the translator can determine +hether a 0ey+ord )ariable is sed in an assignment or dereferencing conte.t( <(13 9hat +o ld be gained if special code +ere compiled for those cases in +hich the conte.t for 0ey+ord )ariables co ld be determined?

55

Chapter 5: )trings and Csets


PCE*PCCTI'C@ *e)eral aspects of strings as a lang age feat re in Icon ha)e a strong infl ence on ho+ they are handled by the implementation( $irst of all, strings are the most freI ently sed type of data in the ma5ority of Icon programs( The n mber of different strings and the total amo nt of string data often are large( Therefore, it is important to be able to store and access strings efficiently( Icon has many operations on strings---nearly fifty of them( *ome operations, s ch as determining the si7e of a string, are performed freI ently( The efficiency of these operations is an important iss e and infl ences, to a considerable e.tent, ho+ strings are represented( Icon strings may be )ery long( 1ltho gh some limitation on the ma.im m length of a string may be acceptable as a compromise +ith the architect re of the comp ter on +hich Icon is implemented 3and hence considerations of efficiency4, this ma.im m m st be so large as to be irrele)ant for most Icon programs( *tring lengths are determined dynamically d ring program e.ec tion, instead of being specified statically in declarations( > ch of the ad)antage of string processing in Icon o)er other programming lang ages comes from the a tomatic management of storage for strings( 1ny of the 2AB G-bit 1*CII characters can appear in an Icon string( C)en the 2n ll2 character is allo+ed( *e)eral operations in Icon ret rn s bstrings of other strings( * bstrings tend to occ r freI ently, especially in programs that analy7e 3as opposed to synthesi7e4 strings( *trings in Icon are atomic---there are no operations in Icon that change the characters in e.isting strings( This aspect of Icon is not ob)io s, in fact, there are operations that appear to change the characters in strings( The atomic nat re of string operations in Icon simplifies its implementation considerably( $or e.ample, assignment of a string )al e to a )ariable need not 3and does not4 copy the string( The order in +hich characters appear is an essential aspect of strings( There are many sit ations in Icon, ho+e)er, +here se)eral characters ha)e the same stat s b t +here their order is irrele)ant( $or e.ample, the concepts of )o+els and p nct ation mar0s depend on set membership b t not on order( Csets are pro)ided for s ch sit ations( Interestingly, many comp tations can be performed sing csets that ha)e nothing to do +ith the characters themsel)es 3"ris+old and "ris+old 1:G3, pp( 1G1-1:14(

).1 ,trings
5(1(1 *epresentation of )trings 1ltho gh it may appear nat ral for the characters of a string to be stored in consec ti)e bytes, this has not al+ays been so( 6n earlier comp ter architect res +itho t byte addressing and character operations, some string-manip lation lang ages represented strings by lin0ed lists of +ords, each +ord containing a single character( * ch a representation seems bi7arre for modem comp ter architect res and ob)io sly cons mes a )ery large amo nt of memory-an intolerable amo nt for a lang age li0e Icon(

The C programming lang age represents strings 3really arrays of characters4 by s ccessi)e bytes in memory, sing a 7ero 3n ll4 byte to indicate the end of a string( ConseI ently, the end of a string can be determined from the string itself, +itho t any e.ternal information( 6n the other hand, determining the length of a string, if it is not already 0no+n, reI ires inde.ing thro gh it, incrementing a co nter ntil a n ll byte is fo nd( $ rthermore, and )ery important for a lang age li0e Icon, s bstrings 3e.cept terminal ones4 cannot occ r +ithin strings, since e)ery C string m st end +ith a n ll byte( *ince any character can occ r in an Icon string, it is not possible to se CJs n lltermination approach to mar0 ends of strings( Therefore, there is no +ay to detect the end of a string from the string itself, and there m st be some e.ternal +ay to determine +here a string ends( This consideration pro)ides the moti)ation for the I alifier representation described in the last chapter( The I alifier pro)ides information, e.ternal to the string itself, that delimits the string by the address of its first character and its length( * ch a representation ma0es the comp tation of s bstrings fast and simple---and, of co rse, determining the length of a string is fast and independent of its length( #ote that C-style strings ser)e perfectly +ell as Icon-style strings, the n ll byte at the end of a C-style string can be ignored by Icon( This allo+s strings prod ced by C f nctions to be sed by Icon( The con)erse is not tr e, in order for an Icon string to be sed by C, a copy m st be made +ith a n ll byte appended at the end( *ome strings are compiled into the r n-time system and others, s ch as strings that appear as literals in a program, are contained in icode files that are loaded into memory +hen program e.ec tion begins( % ring program e.ec tion, Icon strings may be stored in +or0 areas 3 s ally referred to as 2b ffers24( >ost ne+ly created strings, ho+e)er, are allocated in a common string region( 1s so rce-lang age operations constr ct ne+ strings, their characters are appended to the end of those already in the string region( The amo nt of space allocated in the string region typically increases d ring program e.ec tion ntil the region is f ll, at +hich point it is compacted by garbage collection, sI ee7ing o t characters that are no longer needed( *ee Chapter 11 for details( In the pre)io s chapter, the string to +hich a I alifier points is depicted by an arro+ follo+ed by the string( $or e.ample, the string 2the2 is represented by the I alifier

The pointer to 2the2 is 5 st a notational con)enience( 1 more acc rate representation is

The act al )al e of the )-+ord might be 0.AB:a 3he.adecimal4, +here the character t is at memory location 0.AB:a, the character h is at location 0.AB:b, and the character e is at location 0.AB:c( 5(1(2 Concatenation In an e.pression s ch as
s := "hello"

the string 2hello2 is contained in data pro)ided as part of the icode file, and a I alifier for it is assigned to s, no string is constr cted( *ome operations that prod ce strings reI ire the allocation of ne+ strings( Concatenation is a typical e.ample@

5!

s1 := "ab" || "cdef"

In this e.pression, the concatenation operation allocates space for si. characters, copies the t+o strings into this space, and prod ces a I alifier for the res lt@

This I alifier then becomes the )al e of s1( There is one important optimi7ation in concatenation( If the first arg ment in a concatenation is the last string in the string region, the second arg ment is simply appended to the end of the string region( Th s, operations of the form
s := s || expr

perform less allocation than operations of the form


s := expr || s

C.cept for this optimi7ation, no string constr ction operation attempts to se another instance of a string that may e.ist some+here else in the string region( 1s a res lt,
s1 := "ab" || "c" s2 := "a" || "bc"

prod ce t+o distinct strings@

The C code for the concatenation operation is


OpDcl(cat, 2, "||") { char sbuf1 [MaxCvtLen]; /* buffers for conversion */ char sbuf2[MaxCvtLen]; /* to string */ extern char *alcstr(); /* * Convert arguments to strings if necessary. */ if (cvstr(&Arg1, sbuf1) == CvtFail) runerr(103, &Arg1); if (cvstr(&Arg2, sbuf2) == CvtFail) runerr(103, &Arg2); /* * Ensure space for the resulting string. */ strreq(StrLen(Arg1) + StrLen(Arg2)); if (StrLoc(Arg 1) + StrLen(Arg 1) == strfree) /* * The end of Arg1 is at the end of the allocated string * space. Hence, Arg1 was the last string allocated. * Arg1 is not copied. Instead, Arg2 is appended to the * string space and the result is pointed to the start * of Arg1. */ StrLoc(Arg0) = StrLoc(Arg1); else /* * Otherwise, append Arg1 to the end of the allocated * string space and point the result to the start of * Arg1. */

5"

StrLoc(Arg0) = alcstr(StrLoc(Arg1), StrLen(Arg1 )); /* * Append Arg2 to the end. */ alcstr(StrLoc(Arg2), StrLen(Arg2)); /* * Set the length of the result and return. */ StrLen(Arg0) = StrLen(Arg1) + StrLen(Arg2); Return; }

The f nction strreI3n4 ass res that there are at least n bytes a)ailable in the allocated string region( *ee Chapter 11 for details( The f nction alcstr3s, n4 allocates n characters and copies s to that space( The global )ariable strfree points to the beginning of the free space at the end of the allocated string region( 5(1(3 )u-strings >any string operations do not reI ire the allocation of a ne+ string b t only prod ce ne+ I alifiers( $or e.ample, if the )al e of s1 is 2abcdefJ, the s bstring formed by
s2 := s1[3:6]

does not allocate a ne+ string b t only prod ces a I alifier that points to a s bstring of s1@

In order for Icon string )al es to be represented in memory by s bstrings, it is essential that there be no Icon operation that changes the characters inside a string( 1s mentioned earlier, this is the case, altho gh it is not ob)io s from a c rsory e.amination of the lang age( C, on the other hand, allo+s the characters in a string to be changed( The difference is that C considers a string to be an array of characters and allo+s assignment to the elements of the array, +hile Icon considers a string to be an indi)isible atomic ob5ect( It ma0es no more sense in Icon to try to change a character in a string than it does to try to change a digit in an integer( Th s, if
i := j

and
j := j + 1

the )al e of i does not change as a res lt of the s bseI ent assignment to 5( *o it is +ith strings in Icon( 1dmittedly, there are operations in Icon that appear to change the characters in a string( $or e.ample,
s1[3] := "x"

gi)es the appearance of changing the third character in s1 to 2.2( =o+e)er, this e.pression is simply shorthand for
s1 := s1[1:3] || "x" || s1[4:0]

1 ne+ string is created by concatenation and a ne+ I alifier for it is assigned to s1, as sho+n by

5#

6f co rse, the length of the string may be increased or decreased by assignment to a s bstring, as in
s1[ ] :! "xxx" s1 [2:5] :! ""

5(1(4 2ssignment to )u-scripted )trings C.pressions s ch as .QiR and .Qi@5R represent a partic lar challenge in the implementation of Icon( In the first place, the translator cannot determine the type of .( In the case of .QiR, there are fo r basic types that . may legitimately ha)e@ string, list, table, and record( 6f co rse, any type that can be con)erted to a string is legitimate also( Unfort nately, the nature of the operation, not 5 st the details of its implementation, depends on the type( $or strings,
s1 [3] := s2

replaces the third character of s1 by s2 and is eI i)alent to concatenation, as described pre)io sly( $or lists, tables, and records,
x[3] := y

changes the third element of . to y-I ite a different matter 3see C.ercise A(A4( This problem is per)asi)e in Icon and only needs to be noted in passing here( The more serio s problem is that e)en if the s bscripted )ariable is a string, the s bscripting e.pression has different meanings, depending on the conte.t in +hich it appears( If s is a )ariable, then sQiR and sQi@5R also are )ariables( In a dereferencing conte.t, s ch as
write(s[2:5])

the res lt prod ced by sQ2@AR is simply a s bstring of s, and the s bscripting e.pression prod ces the appropriate I alifier( 1ssignment to a s bscripted string, as in
s[2:5] := "xxx"

is not at all +hat it appears to be s perficially( Instead, as already noted, it shorthand for an assignment to s@
s := s[1] || "xxx" || s[6:0]

If the translator co ld determine +hether a s bscripting e.pression is sed in dereferencing or assignment conte.t, it co ld prod ce different code for the t+o cases( 1s mentioned in *ec( <(3(2, ho+e)er, the translator cannot al+ays ma0e this determination( ConseI ently, trapped )ariables are sed for s bscripted string m ch in the +ay they are sed for 0ey+ords( $or e.ample, if the )al e of s is 2abcdef2J, the res lt of e)al ating the s bscripting e.pression sQ2@AR is a substring trapped variable that has the form

&

#ote that both the )ariable for s and the )ariable in the s bstring trapped-)ariable bloc0 point to the same )al e( This ma0es it possible for assignment to the s bstring trapped )ariable to change the )al e of s( The length and offset of the s bstring pro)ide the necessary information either to prod ce a I alifier for the s bstring, in case the s bscripting e.pression is dereferenced, or to constr ct a ne+ string in case an assignment is made to the s bscripting e.pression( $or e.ample, after an assignment s ch as
s[2:5] := "x"

the sit ation is

#ote that the )al e of s has changed( The length of the s bscripted portion of the string has been changed to correspond to the length of the string assigned to it( This reflects the fact that s bscripting identifies the portions of the string before and after the s bscripted portion 32a2 and 2er2, in this case4( In the case of a m ltiple assignment to a s bscripted string, only the original s bscripted portion is changed( Th s, in
(s[2:5] := "x") := "yyyyy"

the final )al e of s is 2ayyyyyef2(


5(1(5 3apping

*tring mapping is interesting in its o+n right, and the C f nction that implements it ill strates se)eral aspects of string processing@
FncDcl(map,3) { register int i; register word slen; register char *s1, *s2, *s3; char sbuf1 [MaxCvtLen], sbuf2 [MaxCvtLen], sbuf3[MaxCvtLen]; static char maptab[256];

extern char *alcstr(); /* * Arg1 must be a string; Arg2 and Arg3 default to &ucase * and &lcase respectively. */ if (cvstr(&Arg1, sbuf1) == CvtFail) runerr(103, &Arg1); if (ChkNull(Arg2)) Arg2 = ucase; if (ChkNull(Arg3)) Arg3 = Icase; /* * If Arg2 and Arg3 are the same as for the last call of map, * the current values in maptab can be used. Otherwise, the * mapping information must be recomputed. */ if (!EqIDesc(maps2,Arg2) || !EqlDesc(maps3,Arg3)) { maps2 = Arg2; maps3 = Arg3; /* * Convert Arg2 and Arg3 to strings. They must be of the * same length. */ if (cvstr(&Arg2, sbuf2) == CvtFail) runerr(103, &Arg2); if (cvstr(&Arg3, sbuf3) == CvtFail) runerr(103, &Arg3); if (StrLen(Arg2) != StrLen(Arg3)) runerr(208. NULL); /* * The array maptab is used to perform the mapping. First, * maptab[i] is initialized with i for i from 0 to 255. * Then, for each character in Arg2, the position in maptab * corresponding to the value of the character is assigned * the value of the character in Arg3 that is in the same * position as the character from Arg2. */ s2 = StrLoc(Arg2); s3 = StrLoc(Arg3); for (i = 0; i <= 255; i++) maptab[i] = i; for (slen = 0; slen < StrLen(Arg2); slen++) maptab[s2[slen]&0377] = s3[slen]; } if (StrLen(Arg1) == 0) { Arg0 = emptystr; Return; } /* * The result is a string the size of Arg1; * ensure that much space. */ slen = StrLen(Arg1); strreq(slen); s1 = StrLoc(Arg1); /*

* Create the result string, but specify no value for it. */ StrLen(Arg0) = slen; StrLoc(Arg0) = alcstr(NULL, slen); s2 = StrLoc(Arg0); /* * Run through the string, using values in maptab to do the * mapping. */ while (slen-- > 0) /s200 ! maptab[(/s100)&0 11].eturn%

The mapping is done sing the character array maptab( This array is set p by first assigning e)ery possible character to its o+n position in maptab and then replacing the characters at positions corresponding to characters in s2 by the corresponding characters in s3( #ote that if a character occ rs more than once in s2, its last 3rightmost4 correspondence +ith a character in s3 applies( To a)oid reb ilding maptab nnecessarily, this step is bypassed if map is called +ith the same )al es of s2 and s3 as in the pre)io s call( The global )ariabIes maps2 and maps3 are sed to hold these 2cached2 )al es( The macro CIl%esc3d1,d24 tests the eI i)alence of the descriptors d1 and d2( The f nction map is an e.ample of a f nction that defa lts n ll-)al ed arg ments( 6mitted arg ments are s pplied as n ll )al es( The defa lts for s2 and s3 are L case and Llcase, respecti)ely( ConseI ently,
map(s)

is eI i)alent to
map(s, &ucase, &lcase)

The macro Ch0# ll3d4 tests +hether or not d is n ll( The )al es of L case and Llcase are in the global )ariables case and lcase(

).$ Csets
*ince Icon ses G-bit characters, regardless of the comp ter on +hich it is implemented, there are 2AB different characters that can occ r in csets( 1 cset bloc0 consists of the s al title containing the cset type code follo+ed by a +ord that contains the n mber of characters in the cset( #e.t, there are +ords containing a total of 2AB bits( Cach bit represents one character, +ith a bit )al e of 1 indicating that the character is present in the cset and a bit )al e of 0 indicating it is absent( 1n e.ample is the )al e of the 0ey+ord Lascii@

The first 12G bits are 1, since these are the bits that correspond to those in 1*CII character set( The C str ct re for a cset bloc0 is
struct b_cset { /* cset block */

word title; /* word size; /* int bits [CsetSize]; /* };

T_Cset */ size of cset */ array of bits */

+here Cset*i7e is the n mber of +ords reI ired to ma0e p a total of 2AB bits( Cset*i7e is G on a comp ter +ith 32-bit +ords and 1B on a comp ter +ith 1B +ords( Cset operations are comparati)ely straightfor+ard( The characters in a cset are represented by a bit )ector that is di)ided into +ords to accommodate con)entional comp ter architect res( $or e.ample, the C code for cset complementation is
OpDcl(compl, 1, "-") { register int i, j; union block *bp; int *cs, csbuf[CsetSize]; extern struct b_cset *alccset(); blkreq( (word)sizeof(struct b_cset)); /* * Arg1 must be a cset. */ if (cvcset(&Arg1, &cs, csbuf) == CvtFail) runerr(104, &Arg1): /* * Allocate a new cset and then copy each cset word from Arg1 * into the new cset words, complementing each bit. */
bp = (union block *)alccset(0);

for (i = 0; i < CsetSize; i++) { bp->cset.bits[i] = -cs[i]; } j = 0; for (i = 0; i < CsetSize * IntSize; i++) { if (Testb(i, bp->cset.bits j++; }
bp->cset.size = j;

Arg0.dword = D_Cset; BlkLoc(Arg0) = bp; Return; }

The macro Testb3b, c4 tests bit b in cset c( ECTE6*PCCTI'C@ The central role of strings in Icon and the nat re of the operations performed on them leads to a representation of string data that is distinct from other data( The I alifier representation is partic larly important in pro)iding direct access to string length and in allo+ing the constr ction of s bstrings +itho t the allocation of additional storage( The penalty paid is that a separate test m st be performed to disting ish strings from all other 0inds of )al es( The ability to assign to s bscripted strings ca ses serio s implementation problems( The trapped-)ariable mechanism pro)ides a sol tion, b t it does so at considerable e.pense in the comple.ity of code in the r n-time system as +ell as storage allocation for trapped)ariable bloc0s( This e.pense is inc rred e)en if assignment is not made to a s bscripted string(

EXERCISES A(1 9hat are the ramifications of IconJs se of the 2AB-bit 1*CII character set, regardless of the 2nati)e2 character set of the comp ter on +hich Icon is implemented? A(2 Catalog all the operations on strings in Icon and point o t any that might ca se special implementation problems( Indicate the aspects of strings and string operations in Icon that are the most important in terms of memory reI irements and processing speed( A(3 &ist all the operations in Icon that reI ire the allocation of space for the constr ction of strings( A(< It has been s ggested that it +o ld be +orth trying to a)oid d plicate allocation of the same string by searching the string region for a ne+ly created string to see if it already e.ists before allocating the space for it( C)al ate this proposal( A(A Consider the follo+ing fo r e.pressions@ s1 QiR @P s2 s1 QiN@1R @P s2 a1 QiR @P a2 a1 QiN@1R @P a2 +here s1 and s2 ha)e string )al es and a1 and a2 ha)e list )al es( %escribe the essential differences bet+een the string and list cases( C.plain +hy these differences indicate fla+s in lang age design( * ggest an alternati)e( A(B The s bstring trapped-)ariable concept has the ad)antage of ma0ing it possible to handle all the conte.ts in +hich string-s bscripting e.pressions can occ r( It is e.pensi)e, ho+e)er, in terms of storage tili7ation( 1naly7e the impact of this feat re on the performance of 2typical2 Icon programs( A(F *ince the conte.ts in +hich most s bscripting e.pressions occ r can be determined, describe ho+ to handle these +itho t sing trapped )ariables( A(G If a s bscripting e.pression is applied to a res lt that is not a )ariable, it is erroneo s to se s ch an e.pression in an assignment conte.t( In +hat sit ations can the translator detect this error? 1re there any sit ations in +hich a s bscripting e.pression is applied to a )ariable b t in +hich the e.pression cannot be sed in an assignment conte.t? A(: There are some potential ad)antages to nifying the 0ey+ord and s bstring trapped)ariable mechanisms into a single mechanism in +hich all trapped )ariables +o ld ha)e pointers to f nctions for dereferencing and assignment( 9hat are the disad)antages of s ch a nification? A(10 Pres mably, it is nli0ely for a programmer to ha)e a constr cti)e need for the polymorphic aspect of s bscripting e.pressions( 6r is it? If it is nli0ely, pro)ide a s pporting arg ment( 6n the other hand, if there are sit ations in +hich this capability is sef l, describe them and gi)e e.amples( A(11 In some ses of map3s1, s2, s34, s1 and s2 remain fi.ed +hile s3 )aries 3"ris+old 1:G0b4( %e)ise a he ristic that ta0es ad)antage of s ch sage(

Chapter : Lists
PCE*PCCTI'C@>ost programming lang ages s pport some form of )ector or array data type in +hich elements can be referenced by position( IconJs list data type fills this need, b t it differs from similar types in many lang ages in that Icon lists are constr cted d ring program e.ec tion instead of being declared d ring compilation( Therefore, the si7e of a list may not be 0no+n ntil r n time( IconJs lists are data ob5ects( They can be assigned to )ariables and passed as arg ments to f nctions( They are not copied +hen this is done, in fact, a )al e of type list is simply a descriptor that points to the str ct re that contains the list elements( These aspects of lists are shared by se)eral other Icon data types and do not add anything ne+ to the implementation( The attrib te of lists that presents the most challenging implementation problem is their ability to gro+ and shrin0 by the se of stac0 and I e e access mechanisms( &ists present different faces to the programmer, depending on ho+ they are sed( They may be static )ectors referenced by position or they may be dynamic changing stac0s or I e es( It might seem that ha)ing a data str ct re +ith s ch apparently discordant access mechanisms +o ld be a+0+ard and ndesirable( In practice, IconJs lists pro)ide a remar0ably fle.ible mechanism for dealing +ith many common programming problems( The t+o +ays of manip lating lists are rarely intermi.ed( 9hen both aspects are needed, they s ally are needed at different times( $or e.ample, the n mber of elements needed in a list often is not 0no+n +hen the list is created( * ch a list can be created +ith no elements, and the elements can be p shed onto it as they are prod ced( 6nce s ch a list has been constr cted, it may be accessed by position +ith no f rther change in its si7e(

/.1 ,tr#ct#res for "ists


The f sion of )ector, stac0, and I e e organi7ations is reflected in the implementation of Icon by relati)ely complicated str ct res that are designed to pro)ide a reasonable compromise bet+een the conflicting reI irements of the different access mechanisms( 1 list consists of a fi.ed-si7e list-header bloc", +hich contains the s al title, the c rrent si7e of the list 3the n mber of elements in it4, and descriptors that point to the first and last bloc0s on a do bly-lin0ed chain of list-elemen bloc"s that contain the act al list elements( &ist-element bloc0s )ary in si7e( 1 list-element bloc0 contains the s al title, the si7e of the bloc0 in bytes three +ords sed to determine the locations of elements in the list-element bloc0 and descriptors that point to the ne.t and pre)io s list-element bloc0s, if any( # n ll descriptor indicates the absence of a pointer to another list-element bloc0 $ollo+ing this data, there are slots for elements( *lots al+ays contain )alid descriptors, e)en if they are not sed to hold list elements( The str ct re declarations for list-header bloc0s and list-element bloc0s are
struct b_list { /* list-header block */ word title; /* T_List */ word size; /* current list size */ struct descrip listhead; /* pointer to first list-element block */ struct descrip listtail; /* pointer to last list-element block */

}; struct b_lelem { /* list-element block */ word title; /* T_Lelem */ word blksize; /* size of block */ word nslots; /* total number of slots */ word first; /* index of first used slot */ word nused; /* number of used slots */ struct descrip listprev; /* previous list-element block */ struct descrip listnext; /* next list-element block */ struct descrip Islots[1]; /* array of slots */ };

9hen a list is created, either by


list(n, x)

or by
[x1 ,x2, ..., xn]

there is only one list-element bloc0( 6ther list-element bloc0s may be added to the chain as the res lt of p shs or p ts( &ist-element bloc0s ha)e a minim m n mber of slots( This allo+s some e.pansion room for adding elements to lists, s ch as the empty list, that are small initially( The minim m n mber of slots is gi)en by >in&ist*lots, +hich normally is eight( In the e.amples that follo+, the )al e of >in&ist*lots is ass med to be fo r in order to 0eep the diagrams to a manageable si7e( The code for the list f nction is
FncDcl(list, 2) { register word i, size; word nslots; register struct b_lelem *bp; register struct b_list *hp; extern struct b_list *alclist(); extern struct b_lelem *alclstb(); defshort(&Arg1, 0); /* size defaults to 0 */ nslots = size = IntVal(Arg1); /* * Ensure that the size is positive and that the list-element * has at least MinListSlots slots. */ if (size < 0) runerr(205, &Arg1); if (nslots < MinListSlots) nslots = MinListSlots; /* * Ensure space for a list-header block, and a list-element I * with nslots slots. */ blkreq( sizeof(struct b_list) + sizeof(struct b_lelem) + nslots -: 1 * sizeof(Struct descrip)); /* * Allocate the list-header block and a list-element block. * Note that nslots is the number of slots in the list-element * block while size is the number of elements in the list. */

hp = alclist(size); bp = alclstb(nslots, (word)O, size); hp->listhead.dword = hp->listtail.dword = D_Lelem; BlkLoc(hp->listhead)=BlkLoc(hp->listtail) = (union block *)b; /* * Initialize each slot. */ for (i = 0; i < size; i++) bp->lslots[i] = Arg2. /* * Return the new list. */ Arg0.dword = D_List; BlkLoc(Arg0) = (union block *)hp; Return; }

The data str ct res prod ced for a list are ill strated by the res lt of e)al ating
a := list(1, 4)

+hich prod ces a one-element list containing the )al e <@

%ata *tr ct res for list31,<4 #ote that there is only one list-element bloc0 and that the slot inde.ing in the bloc0 is 7ero-based( Un sed slots contain n ll )al es that are logically inaccessible(

"

/.$ 0#e#e and ,tack Access


Clements in a list-element bloc0 are stored as a do bly-lin0ed circ lar I e e( If an element is added to the end of the list a, as in
put(a, 5)

the elements of the list are < and A( The )al e is added to the J2end2 of the last list-element bloc0, ass ming there is an n sed slot 3as there is in this case4( The code in p t to do this is
/* * Point hp to the list-header block and bp to the last * list-element block. */ hp = (struct b_list *)BlkLoc(Arg1); bp = (struct b_lelem *)BlkLoc(hp->listtail); /* * If the last list-element block is full, allocate a new * list-element block, make it the first list-element block, * and make it the next block of the former last list-element * block. */ if (bp->nused >= bp->nslots) { bp = alclstb((word)MinListSlots, (word)0, (word)0); BlkLoc(hp->listtail)->lelem.listnext.dword = D_Lelem; BlkLoc(BlkLoc(hp->listtail)->lelem.listnext) = (union block *)bp; bp->listprev = hp->listtail; BlkLoc(hp->listtail) = (union block *)bp; } /* * Set i to position of new last element and assign Arg2 to * that element. */ i = bp->first + bp->nused; if (i >= bp->nslots) i -= bp->nslots; bp->lslots[i] = Arg2:
/* Adjust block usage count and current 12

bp->nused++; hp->size++; /* * Return the list. */ Arg0 = Arg1; Return; }

The effect on the list-header bloc0 and list-element bloc0 is@

#ote that the increase in the n mber of elements in the header bloc0 and in the n mber of slots sed in the list-element bloc0( If an element is added to the beginning of a list, as in
push(a,3)

the elements of the list are 3, <, and A( The ne+ element is p t at the J2beginning2 of the first list-element bloc0( The res lt is

!&

The &ist Clement-/loc0 after a p sh #ote that the J2beginning,2 +hich is before the first physical slot in the listelement bloc0, is the last physical slot( The locations of elements that are in a list-element bloc0 are determined by the three integers at the head of the list element bloc0( 2Eemo)al2 of an element by a pop, get, or p ll does not shorten the list-element bloc0 or o)er+rite the element, the element merely becomes inaccessible( If an element is added to a list and no more slots are a)ailable in the appropriate listelement bloc0, a ne+ list-element bloc0 is allocated and lin0ed in( $or e.ample, follo+ing e)al ation of
push(a.2) push(a.1)

the list elements are 1,2,3,<, and A( The res lting str ct res are

!1

The 1ddition of a &ist-Clement /loc0 1s elements are remo)ed from a list by pop 3+hich is synonymo s +ith get4 or p ll( the indices in the appropriate list-element bloc0 are ad5 sted( The code for pop is
FncDcl(pop, 1) { register word i; register struct b_list *hp; register struct b_lelem *bp; extern struct b_lelem *alclstb(); /* * Arg1 must be a list. */ if (Arg1.dword != D_List)

!2

runerr(108. &Arg1); /* * Fail if the list is empty. /2


*p ! (struct b,list /)3lk4oc(5r61)if (*p78si9e :! 0)

Fail; /* * Point bp to the first list-element block. If the first * block has no slots in use, point bp at the next * list-element block. /2
bp ! (struct b,lelem /)3lk4oc(*p78list*ead)-

if (bp->nused <= 0) { bp = (struct b_lelem *)BlkLoc(bp->listnext); BlkLoc(hp->listhead) = (union block *)bp; bp->listprev = nulldesc; } /* * Locate first element and assign it to Arg0 for return. */ i = bp->first; Arg0 = bp->lslots[i]; /* * Set bp->first to new first element. or 0 if the block is * now empty. Decrement the usage count for the block and the * size of the list. */ if (++i >= bp->nslots) i = 0; bp->first = i; bp->nused--; hp->size--; Return; }

Th s, as a res lt of
pop(a)

the list elements are 2, 3, <, and A( The res lting str ct res are

!3

The Ees lt of Eemo)ing Clements from a &ist-Clement /loc0

#ote that the first list-element bloc0 is still lin0ed in the chain, e)en tho gh it no longer contains any elements that are logically accessible( 1 list-element bloc0 is not remo)ed from the chain +hen it becomes empty( It is remo)ed only +hen an element is remo)ed from a list that already has an empty list-element bloc0( Th s, there is al+ays at least one list-element bloc0 on the chain, e)en if the list is empty( 1side from simplifying the access to list-element bloc0s from the list-header bloc0, this strategy a)oids repeated allocation in the case that pop!p sh pairs occ r at the bo ndary of t+o list-element bloc0s( Contin ing the pre)io s e.ample,
pop(a)

!4

lea)es the list elements 3, <, and A( The empty list-element bloc0 is remo)ed from the chain@

Eemo)al of an Cmpty &ist-Clement /loc0 #ote that the )al e 2 is still physically in the list-element bloc0, altho gh it is logically inaccessible(

/.& !ositional Access


Positional reference of the form aQiR reI ires locating the correct list-element bloc0( 6 tof-range references can be determined by e.amining the list-header bloc0( If the list has se)eral list-element bloc0s, this in)ol)es lin0ing thro gh the list-element bloc0s, +hile 0eeping trac0 of the co nt of elements in each bloc0 ntil the appropriate one is reached( The res lt of e)al ating aQiR is a )ariable that points to the appropriate slot( The portion of the s bscripting code that handles lists is
switch (Type(Arg1)) { case T_List: /* * Make sure that Arg2 is an integer and that the * subscript is in range. */ if (cvint(&Arg2, &11) == CvtFail) runerr(101, &Arg2); i = cvpos(11, BlkLoc(Arg1)->list.size); if (i == 0 || i > BlkLoc(Arg1)->list.size) Fail; /* * Locate the list-element block containing the desired * element. */

!5

bp = BlkLoc(BlkLoc(Arg1)->list.listhead); j = 1; while (i >= j + bp->lelem.nused) { j += bp->lelem.nused; bp = BlkLoc(bp->lelem.listnext); } 2/ * Locate the desired element and return a pointer to it. */ i += bp->lelem.first -j; if (i >= bp->lelem.nslots) i -= bp->lelem.nslots; dp = &bp->lelem.Jslots[i]; Arg0.dword = D_Var + ((int *)dp -(int *)bp); VarLoc(Arg0) = dp; Return;

$or the preceding e.ample, aQ3R prod ces a )ariable that points to the descriptor for the )al e A@

Eeferencing a &ist Clement #ote the offset of ele)en +ords in the d-+ord of the )ariable( This is present so that the title of the bloc0 to +hich the )ariable points can be located in case there is a garbage collection( *ee Chapter 11 for details( ECTE6*PCCTI'C@ The str ct res sed for implementing lists are relati)ely complicated, b t they pro)ide a reasonable compromise, both in the tili7ation of storage and access speed, that accommodates different access mechanisms( Using a chain of list-element bloc0s allo+s lists to gro+ in si7e +itho t limit( $rom the )ie+point of positional access, this amo nts to segmentation( This segmentation only occ rs, ho+e)er, +hen elements are added to a list( The se of circ lar I e es +ithin listelement bloc0s allo+s elements to be remo)ed and added +itho t +asting space(

EXERCISES B(1 %iagram the str ct res that res lt from the e)al ation of the follo+ing e.pressions@
6rap* :! ["a",,] 6rap*[2] :! 6rap*[ ] :! 6rap*

B(2 =o+ m ch space does an empty list occ py? B(3 The portions of the str ct res for a list that are not occ pied by elements of the list constit te o)erhead( Calc late the percentage of o)erhead in the follo+ing lists( 1ss me that the minim m n mber of slots in a list-element bloc0 is eight(
a a a a a :! :! :! :! :! [] [1, 2] [1, 2, , ;, 5] list(100) []- ever+ put(a, 1 to 100)

=o+ do these fig res )ary as a f nction of the minim m n mber of slots in a listelement bloc0? B(< 9hat are the implications of not 27eroing2 list elements +hen they are logically remo)ed by a pop, get, or p ll? B(A 9hen a list-element bloc0 is nlin0ed as the res lt of a pop, get, or p ll, are the elements in it really inaccessible to the so rce program? B(B There is considerable o)erhead in)ol)ed in the implementation of lists to s pport both positional access and stac0 and I e e access mechanisms( * ppose the lang age +ere changed so that stac0 and I e e access mechanisms applied only to lists that +ere initially empty( 9hat +o ld the li0ely impact be on e.isting Icon programs? =o+ co ld the implementation ta0e ad)antage of this change? B(F 1s elements are added to lists, more list-element bloc0s are added and they tend to become 2fragmented(2 Is it feasible to reorgani7e s ch lists, combining the elements in many list-element bloc0s into one large bloc0? If +hen and ho+ co ld this be done? B(G 1 s ggested alternati)e to maintaining a chain of list-element bloc0s is to allocate a larger bloc0 +hen space is needed and copy elements from the pre)io s bloc0 into it( Critici7e this proposal( B(: * ppose it +ere possible to insert elements in the middle of lists, rather than only at the ends( =o+ might this feat re be implemented?

!!

Chapter !: )ets and Ta-les


PCE*PCCTI'C@ *ets and tables are data aggregates that are )ery sef l for a n mber of common programming tas0s( #e)ertheless, fe+ programming lang ages s pport these data types, +ith the notable e.ceptions of *ail 3Eeiser 1:FB4 and *CT& 3%e+ar, *chonberg, and *ch+art7 1:G14( There are many reasons +hy these ob)io sly sef l data types are not fo nd in most programming lang ages, b t percei)ed implementation problems certainly ran0 high among them( If only for this reason, their implementation in Icon is +orth st dying( =istorically, tables in Icon +ere inherited from *#6/6&< and *&A( *ets came later, as an e.tension to Icon, and +ere designed and implemented as a class pro5ect( 1ltho gh sets +ere a late addition to Icon, they are simpler than tables( #onetheless, they present many of the same implementation problems that tables do( ConseI ently, sets are considered here first( *ets and the operations on them s pport the familiar mathematical concepts of finite sets@ membership, the insertion and deletion of members, and the operations of nion, intersection, and difference( 9hat is interesting abo t a set in Icon is that it can contain members of any data type( This is certainly a case +here heterogeneity significantly increases the sef lness of a data aggregate +itho t adding to the diffic lty of the implementation, per se. The ability of a set to gro+ and shrin0 in si7e infl ences the implementation significantly( Cfficient access to members of a set, +hich is needed for testing membership as +ell as the addition and deletion of members, is an important consideration, since sets can be arbitrarily large( Tables ha)e more str ct re than sets( 1bstractly, a table is a set of pairs that represents a many-to-one relationship-a f nction( In this sense, the defa lt )al e of a table pro)ides an e.tension of the partial f nction represented by the entry and assigned )al e pairs to a complete f nction o)er all possible entry )al es( Programmers, ho+e)er, tend to )ie+ tables in a more restricted +ay, sing them to tab late the attrib tes of a set of )al es of interest( In fact, before sets +ere added to Icon, tables +ere often sed to sim late sets by associating a specific assigned )al e +ith membership(

3.1 ,ets
!(1(1 /ata $rgani'ation for )ets =ash loo0 p and lin0ed lists are sed to pro)ide an efficient +ay of locating set members( $or e)ery set there is a set-header bloc0 that contains a +ord for the n mber of members in the set and slots that ser)e as heads for 3possibly empty4 lin0ed lists of set-element bloc0s( The n mber of slots is an implementation parameter( There are thirty-se)en slots in table-header bloc0s on comp ters +ith large address spaces b t only thirteen slots on comp ters +ith small address spaces( The str ct re for an empty set, prod ced by
s :! set([])

is

!"

Cach member of a set is contained in a separate set-element bloc0( 9hen a )al e is loo0ed p in a set 3for e.ample, to add a ne+ member4, a hash n mber is comp ted from this )al e( The absol te )al e of the remainder res lting from di)iding the hash n mber by the n mber of slots is sed to select a slot( Cach set-element bloc0 contains a descriptor for its )al e, the corresponding hash n mber, and a pointer to the ne.t set-element bloc0, if any, on the lin0ed list( $or e.ample, the setelement bloc0 for the integer 3: is@

1s ill strated by this fig re, the hash n mber for an integer is 5 st the )al e of the integer( This member goes in slot 2 on comp ters +ith large address spaces, since its remainder on di)ision by the n mber of slots is t+o( =ash comp tation is disc ssed in detail in *ec( F(3( The str ct res for the set
s := set([39,2])

are

!#

This e.ample +as chosen for ill stration, since both 2 and 3: go in slot 2( In searching the list, the hash n mber of the )al e being loo0ed p is compared +ith the hash n mbers in the set-element bloc0s( If a match is fo nd, the )al e in the set-element bloc0 mayor may not be the same as the )al e being loo0ed p, since collisions in the hash comp tation are na)oidable( Th s, if the hash n mbers are the same, it is necessary to determine +hether or not their )al es are eI i)alent( The comparison that is sed is the same one that is sed by the so rce-lang age operation . PPP y( To impro)e the performance of the loo0 p process, the set-element bloc0s in each lin0ed list are ordered by their hash n mbers( 9hen a lin0ed list of set-element bloc0s is e.amined, the search stops if a hash n mber of an element on the list is greater than the hash n mber of the )al e being loo0ed p( If the )al e is not fo nd and the loo0 p is being performed to insert a ne+ member, a setelement bloc0 for the ne+ member is created and lin0ed into the list at that point( $or e.ample,
insert(s, -39)

inserts a set-element bloc0 for -3: at the head of the list in slot 2, since its hash )al e is -3:( The +ord in the set-header bloc0 that contains the n mber of members is incremented to reflect the insertion( !(1(2 )et $perations The set operations of nion, intersection, and difference all prod ce ne+ sets and do not modify their arg ments( In the case of nion, a copy of the larger set is made first to pro)ide the basis for the nion( This in)ol)es not only copying the set-header bloc0 b t also all of its set-element bloc0s( These are lin0ed together as in the original set, and no loo0 p is reI ired( 1fter this copy is made, each member of the set for the other arg ment is inserted in the copy, sing the same techniI e that is sed in insert( The larger set is copied, since copying does

"&

not reI ire loo0 p and the possible comparison of )al es that insertion does( The insertion of a member from the second set may ta0e longer, ho+e)er, since the lin0ed lists in the copy may be longer( In the case of intersection, a copy of the smaller arg ment set is made, omitting any of its members that are not in the larger set( 1s +ith nion, this strategy is designed to minimi7e the n mber of loo0 ps( $or the difference of t+o sets, a copy of the first arg ment set is made, adding only elements that are not in the second arg ment( This in)ol)es loo0ing p all members in the first arg ment set in the second arg ment set(

3.$ Ta.les
!(2(1 /ata $rgani'ation for Ta-les The implementation of tables is similar to the implementation of sets, +ith a header bloc0 containing slots for elements ordered by hash n mbers( 1 table-header bloc0 contains an e.tra descriptor for the defa lt assigned )al e( 1n empty table +ith the defa lt assigned )al e 0 is prod ced by
t := table(0)

The str ct re of the table-header bloc0 is

Table loo0 p is more complicated than set loo0 p, since table elements contain both an entry )al e and an assigned )al e( $ rthermore, table elements can be referenced by )ariables( 1 ne+ table element is created as a byprod ct of assignment to a table reference +ith an entry )al e that is not in the table(

"1

The res lt of e)al ating an assignment e.pression s ch as


t[39] := 1

ill strates the str ct re of a table-element bloc0@

In the case of a table reference s ch as tQ.R, the hash n mber for the entry )al e . is sed to select a slot, and the corresponding list is searched for a table-element bloc0 that contains the same entry )al e( 1s in the case of sets, comparison is first made sing hash n mbers, )al es are compared only if their hash n mbers are the same( If a table-element bloc0 +ith a matching entry )al e is fo nd, a )ariable that points to the corresponding assigned )al e is prod ced( $or e.ample, if 3: is in t as ill strated pre)io sly, tQ3:R prod ces

If this )ariable is dereferenced, as in


write(t[39])

the )al e 1 is +ritten( 6n the other hand, if an assignment is made to this )ariable, as in
t[39] +:= 1

the assigned )al e in the table-element bloc0 is changed@

If a table element +ith a matching entry )al e is not fo nd, the sit ation is )ery similar to that in a s bscripted string@ the operation to be performed depends on +hether the table reference is sed in a dereferencing or assignment conte.t( In a dereferencing conte.t, the

"2

defa lt )al e for the table is prod ced, +hile in an assignment conte.t, a ne+ element is added to the table( The approach ta0en is similar to that for s bscripted strings@ a trapped )ariable is created( 1s +ith s bstring trapped )ariables, table-element trapped )ariables contain the information that is necessary to carry o t the reI ired comp tation for either dereferencing or assignment( * ppose, for e.ample, that the entry )al e 3B is not in the table t( Then tQ3BR prod ces the follo+ing res lt@

#ote that the si7e of a table-element trapped-)ariable bloc0 is the same as the si7e of a table-element bloc0( The last descriptor in the table-element trapped-)ariable bloc0 is reser)ed for s bseI ent se, as described belo+( If this trapped )ariable is dereferenced, as in
write(t[36])

the defa lt assigned )al e, 0, +hich is in the table-header bloc0 for t, is prod ced( Unfort nately, the sit ation is not al+ays this simple( It is possible for elements to be inserted in a table bet+een the time the table-element trapped-)ariable bloc0 is created and the time it is dereferenced( 1n e.ample is
write(t[36] , t[36] := 2)

*ince f nctions do not dereference their arg ments ntil all the arg ments ha)e been e)al ated, the res lt of dereferencing the first arg ment of +rite sho ld be 2, not 0( In order to handle s ch cases, +hen a table-element trapped )ariable is dereferenced, its lin0ed list in the table m st be searched again to determine +hether to ret rn the assigned )al e of a ne+ly inserted element or to ret rn the defa lt )al e( If an assignment is made to the table reference, as in
t[36] +:= 1

the table-element trapped-)ariable bloc0 is con)erted to a table-element bloc0 +ith the assigned )al e stored in the reser)ed descriptor of the table-element trapped-)ariable bloc0( The table-element bloc0 is then lin0ed in the appropriate place( #ote that the str ct res of table-element bloc0s and table-element trapped-)ariable bloc0s are the same, allo+ing this con)ersion +itho t allocating a ne+ table-element bloc0( It then is necessary to search the lin0ed list for its slot again to determine the place to insert the table-element bloc0( 1s in the case of dereferencing, elements may ha)e been inserted in the table bet+een the time the table-element trapped )ariable +as created and the time a )al e is assigned to it( #ormally, no matching entry is fo nd, and the tableelement trapped-)ariable bloc0, transformed into a table-element bloc0, is inserted +ith the

"3

ne+ assigned )al e( If a matching entry is fo nd, its assigned )al e is simply changed, and the bloc0 is discarded( #ote that reference to a )al e that is not in a table reI ires only one comp tation of its hash )al e, b t t+o loo0 ps are reI ired in the lin0ed list of table-element bloc0s for its slot(

3.& Hashing %#nctions


Ideally, a hash comp tation sho ld prod ce a different res lt for e)ery different )al e to +hich it is applied, and the distrib tion of the remainder on di)ision by the n mber of slots sho ld be niform( C)en approaching this ideal reI ires an impractical amo nt of comp tation and space( In practice, it is desirable to ha)e a fast comp tation that prod ces fe+ collisions( The s b5ect of hash comp tation has been st died e.tensi)ely and there is a s bstantial body of 0no+ledge concerning sef l techniI es 38n th 1:F3, pp( A0B-A<:4( $or e.ample, it is 0no+n that the n mber of slots sho ld be a prime that is not close to a po+er of t+o( This consideration moti)ated the choices of 3F and 13 for comp ters +ith large and small address spaces, respecti)ely( In general, there is a trade-off bet+een faster loo0 p, on the a)erage, and more storage o)erhead( In most sit ations in +hich hashing techniI es are sed, all the )al es for +hich hash comp tations are performed are strings( In Icon, ho+e)er, any 0ind of )al e can be the member of a set or the entry )al e in a table( The hash comp tation m st, therefore, apply to any type of )al e( The s pport ro tine for comp ting hash n mbers is
word hash(dp) struct descrip *dp; { word i; double r; register word j; register char *s; if (Qual(*dp)) { /* * Compute the hash value for the string by summing the * value of all the characters (to a maximum of 1 0) plus * the length. */ i = 0; s = StrLoc(*dp); j = StrLen(*dp); for (j = (j <= 10) ? j : 10 ; j > 0; j--) i += *s++ & 0377; i += StrLen(*dp) & 0377; } else { switch (Type(*dp)) { /* * The hash value for numeric types is the bit-string * representation of the value. */ case T_Integer: i = IntVal(*dp); break; case T_Long: i = BlkLoc(*dp)->longint.intval;

"4

break; case <,.eal: =et.eal(dp, r)- i ! rbreakcase <,>set: 2/ / >ompute t*e *as* value for a cset b+ performin6 t*e / exclusive7or of t*e )ords in t*e bit arra+' /2 i ! 0for (? ! 0- ? : >set@i9e- ?00) i A! 3lk4oc(/dp)78cset'bits[?]breakdefault: 2/ / Bor ot*er t+pes, use t*e t+pe code as t*e *as* / value' /2 i ! <+pe(/dp)break% % return i%

To hash a string, its characters are added together as integers( 1t most ten characters are sed, since strings can be )ery long and adding many characters does not impro)e the hashing s fficiently to 5 stify the time spent in the comp tation( The ma.im m of ten is, ho+e)er, ad hoc. To pro)ide a meas re of discrimination bet+een strings +ith the same initial s bstring, the length of the string is added to the s m of the characters( This techniI e for hashing strings is not sophisticated, and others that prod ce better hashing res lts are 0no+n( =o+e)er, the comp tation is simple, easy to +rite in C, and +or0s +ell in practice( $or a n meric type, the hash )al e is simply the n mber( In the case of a cset, the +ords containing the bits for the cset are combined sing the e.cl si)e-or operation( The remaining data types pose an interesting problem( =ash comp tation m st be based on attrib tes of a )al e that are in)ariant +ith time( *ome types, s ch as files, ha)e s ch attrib tes( 6n the other hand, there is no time-in)ariant attrib te that disting ishes one list from another( The si7e of a list may change, the elements in it may change, and e)en its location in memory may change as the res lt of garbage collection( $or a list, its only timein)ariant attrib te is its type( This presents a dilemma-the type of s ch a )al e can be sed as its hash n mber, b t if that is done, all )al es of that type are in the same slot and ha)e the same hash n mber( &oo0 p for these )al es degenerates to a linear search( The alternati)e is to add some time-in)ariant attrib te, s ch as a serial n mber, to these )al es( This +o ld increase the si7e of e)ery s ch )al e, ho+e)er( =ash comp tation in Icon resol)es this problem in fa)or of simplicity( &ists and similar )al es are hashed according to their type codes( Part of the rationale for this choice is that it is ncommon, in practice, to ha)e sets of lists, tables of sets, and so forth( 6n balance, it probably is not +orth adding the space o)erhead for e)ery s ch )al e 5 st to impro)e the performance of only a fe+ programs(

"5

ECTE6*PCCTI'C@ $e+ programming lang ages s pport sets or tables, fe+er s pport them +ith IconJs generality( The implementation of sets and tables pro)ides a clear foc s on the generality of descriptors and the niformity +ith +hich different 0inds of data are treated in Icon( *ince sets and tables may be )ery large, efficient loo0 p is an important concern( The hashing and chaining techniI e sed is only one of many possibilities( =o+e)er, there m st be a mechanism for determining the eI i)alence of )al es independent of the str ct re in +hich they are stored( The fact that elements in tables are accessed by s bscripting e.pressions introd ces se)eral comple.ities( In partic lar, the fact that the contents of the table that is s bscripted may change bet+een the time the s bscripting e.pression is e)al ated and the time it is dereferenced or assigned to introd ces the necessity of t+o loo0 ps for e)ery table reference( =ashing a )ariety of different types of data raises interesting iss es( The hashing techniI es sed by Icon are not sophisticated and there is considerable room for impro)ement( The trade-offs in)ol)ed are diffic lt to e)al ate, ho+e)er( 454*CI)4) F(1Contrast sets and csets +ith respect to their implementation, their programming, and the efficiency of operations on them(

sef lness in sef l in

F(2 "i)e an e.ample of a sit ation in +hich the heterogeneity of sets is programming( F(3 =o+ m ch space does an empty set occ py?

F(< %iagram the str ct res res lting from the e)al ation of the follo+ing e.pressions@
t :! table() t[t] :! t

F(A There are many sophisticated data str ct res that are designed to ens re efficient loo0 p in data aggregates li0e sets and tables 3"onnet 1:G<4( Consider the importance of speed of loo0 p in sets and tables in Icon and the ad)antages that these more sophisticated data str ct res might s pply( F(B *ome of the more sophisticated data str ct res mentioned in the preceding e.ercise ha)e been tried e.perimentally in Icon and either ha)e introd ced ne.pected implementation problems or ha)e not pro)ided a significant impro)ement in performance( 9hat are possible reasons for these disappointing res lts? F(F Icon goes to a lot of tro ble to a)oid adding table-element bloc0s to a table nless an assignment is made to them( * ppose a table-element bloc0 +ere simply added +hen a reference +as made to an entry )al e that is not in the table(

=o+ +o ld this simplify the implementation? 9hat positi)e and negati)e conseI ences co ld this change ha)e on the r nning speed and space reI ired d ring program e.ec tion? "i)e e.amples of types of programs for +hich the change +o ld ha)e positi)e and negati)e effects on performance, respecti)ely(

"

9o ld this change be transparent to the Icon programmer, not co nting possible time and space differences?

F(GThere is space in a table-element trapped-)ariable bloc0 to p t the defa lt )al e for the table( 9hy is this not done? F(: 9hat is the conseI ence of e)al ating the follo+ing e.pressions?
t := table(0) t[37] := 2 write(t[37], t := table(1))

9hat +o ld happen if the last line gi)en pre)io sly +ere


write(t[37],t := list(100,3))

or
write(t[37], t := "hello")

F(10 "i)e e.amples of different strings that ha)e the same hash n mbers( F(11 %esign a method for hashing strings that prod ces a better distrib tion than the the c rrent one( F(12 9hat attrib te of a table is time-in)ariant? F(13 9hat 0inds of symptoms might res lt from a hashing comp tation based on an attrib te of a )al e that is not time-in)ariant?

"!

""

Chapter ": The Interpreter


PCE*PCCTI'C@ The interpreter pro)ides a soft+are reali7ation of IconJs )irt al machine( This machine is stac0-based( The basic nits on +hich the Icon )irt al machine operates are descriptors( The instr ctions for the )irt al machine consist of operations that manip late the stac0, call C f nctions that carry o t the b ilt-in operations of Icon, and manage the flo+ of control( The Icon interpreter e.ec tes these )irt al machine instr ctions( It consists of a loop in +hich a )irt al machine instr ction is fetched and control is transferred to a section of code to perform the corresponding operation(

4.1 ,tack+Based 56al#ation


'irt al machine instr ctions typically p sh and pop data on the interpreter stac0( The interpreter stac0, +hich is distinct from the stac0 sed for calls of C f nctions, is an array of +ords( The )ariable sp points to the last +ord p shed on the interpreter stac0( P shing increments sp, +hile popping decrements it( 9hen the interpreter e.ec tes code that corresponds to a b ilt-in operation in Icon, it p shes descriptors for the arg ments on the interpreter stac0 and calls a C f nction corresponding to that operation +ith a pointer to the place on the interpreter stac0 +here the arg ments begin( 1 n ll descriptor is p shed first to ser)e as a 27eroth2 arg ment 31rg04 that recei)es, by con)ention, the res lt of the comp tation and becomes the top descriptor on the stac0 +hen the C f nction ret rns( 6n a more con)entional )irt al machine, the res lt of the comp tation +o ld be p shed on the stac0, instead of being ret rned in an arg ment( The latter method is more con)enient in Icon( To ill strate this basic mechanism, consider the e.pression
?10

+hich prod ces a randomly selected integer bet+een 1 and 10, incl si)e( The corresponding )irt al machine instr ctions are
pnull int random 10 # push null descriptor for the result # push descriptor for the integer 10 # compute random value

The instr ctions pn ll and int operate directly on the stac0( The instr ction random calls a C f nction that comp tes random )al es( The pn ll instr ction p shes a n ll descriptor@

"#

The int instr ction p shes a descriptor for the integer 10@

* ppose that the C f nction for random comp tes 3( It replaces the n ll )al e of 1rg0 by a descriptor for the integer 3( 9hen it ret rns, sp is set to point to 1rg0 and the sit ation is

4.$ 'irt#al Machine nstr#ctions


The )ario s aspects of e.pressions that appear in Icon so rce-lang age programs are reflected, directly or indirectly, in the instr ction set for the Icon )irt al machine( Eeferences to constants 3literals4 and identifiers ha)e direct correspondences in the instr ction set of the )irt al machine( There is a )irt al machine instr ction for each so rce-lang age operator( This is possible, since the meaning of an operation is fi.ed and cannot be changed d ring program e.ec tion( The meaning of a f nction call, ho+e)er, cannot be determined ntil it is e)al ated, and there is a single )irt al machine instr ction for f nction in)ocation( The in)ocation of f nctions is described in detail in Chapter 10( There are se)eral )irt al machine instr ctions related to control str ct res and the niI e aspects of e.pression e)al ation in Icon( These are disc ssed in the ne.t t+o chapters( 1 complete list of )irt al machine instr ctions is gi)en in 1ppendi. /( "(2(1 Constants $o r 0inds of data can be represented literally in Icon programs@ integers, strings, csets, and real n mbers( The fo r corresponding )irt al machine instr ctions are
int str cset real n # integer n n, a # string of length n at address a a # cset block at address a a # real block at address a

The )al es of integer literals appear as arg ments of int instr ctions( In the case of strings, the t+o arg ments gi)e its length and the address of its first character( The string itself is constr cted by the lin0er and is loaded into memory from the icode file( $or csets and real n mbers, the lin0er constr cts bloc0s, +hich are also loaded from the icode file( These bloc0s are identical in format to bloc0s that are constr cted d ring program e.ec tion(

#&

The )irt al machine instr ctions str, cset, and real p sh appropriate descriptors to reference the data as it appears in the icode( $or e.ample, the )irt al machine instr ctions for
?"aeiou"

are
pnull str random 5, a

+here a is the address of the string 2aeio 2( The pn ll instr ction p shes a n ll descriptor as in the pre)io s e.ample@

The str instr ction constr cts a descriptor for the string 2aeio 2@

If random prod ces the string 202, this string replaces the n ll descriptor and the stac0 becomes

"(2(2 Identifiers $rom the )ie+point of the interpreter, there are fo r 0inds of identifiers@ global identifiers, static identifiers, local identifiers, and arg ments( The )al es of global and static identifiers are in arrays of descriptors at fi.ed locations in memory( The )al es of local identifiers and arg ments, on the other hand, are 0ept on the stac0 as part of the infonnation associated +ith a proced re call( The )al es of the arg ments in the call of a proced re are p shed on the stac0 as the res lt of the e)al ation of e.pressions prior to the in)ocation of the proced re( The initial n ll )al es for local identifiers are p shed on the stac0 +hen the proced re is called(

#1

The portion of the stac0 bet+een the arg ments and local identifiers is fi.ed in si7e and contains information that is sa)ed +hen a proced re is called( Thi information is described in Chapter 10( There are fo r )irt al machine instr ctions for constr cting )ariable descriptors@
6lobal n static n ar6 n local n

Identifiers of each 0ind are n mbered starting at 7ero( ConseI ently,


arg 0

p shes a )ariable descriptor for the first arg ment( In each case, the descriptor that is p shed on the stac0 is a )ariable that points to the descriptor for the )al e of the corresponding identifier( Consider the e.pression
j := 1

The corresponding )irt al machine instr ctions are


pnull local int asgn 2 1 # # # # push null descriptor for the result push variable descriptor for j push descriptor for the integer 1 perform assignment

9hen these instr ctions are interpreted( the s ccession of stac0 states is

The *tac0 after pn ll

#2

The *tac0 after local 2

#3

The *tac0 after int 1

The *tac0 after asgn

#4

#ote that asgn assigns the )al e of its second arg ment to 5 and o)er+rites 1rg0 +ith a )ariable descriptor, +hich is left on the top of the stac0( *imilarly, the )irt al machine instr ctions for
z := x

are
pnull local 0 ar6 0 as6n

the states of the stac0 are

The *tac0 after pn ll

#5

The *tac0 after local 0

The *tac0 after arg 0

The *tac0 after asgn

#!

4.& $perators
There is a )irt al machine instr ction for each of the forty-t+o operators in Icon( The instr ctions random and asgn described pre)io sly are e.amples( Casting Icon operators as )irt al machine instr ctions mas0s a considerable amo nt of comple.ity, since fe+ Icon operators are simple( $or e.ample, altho gh . N y appears to be a straightfor+ard comp tation, it in)ol)es chec0ing the types of . and y, con)erting them to n meric types if they are not already n meric, and terminating +ith an error message if this is not possible( If . and y are n meric or con)ertible to n meric, addition is performed( C)en this is not simple, since the addition may be integer or floating-point, depending on the types of the arg ments( $or e.ample, if . is an integer and y is a real n mber, the integer is con)erted to a real n mber( #one of these comp tations is e)ident in the )irt al machine instr ctions prod ced for this e.pression( +hich are
pnull local x local y plus

In the instr ctions gi)en pre)io sly, the indices that are sed to access identifiers ha)e been replaced by the names of the identifiers, +hich are ass med to be local( This con)ention is follo+ed in s bseI ent )irt al machine instr ctions fo ease of reading( 1 gmented assignment operations do not ha)e separate )irt al machine instr ctions( Instead, the instr ction d p first p shes a n ll descriptor and then p shes a d plicate of the descriptor that +as pre)io sly on top of the stac0( $or e.ample, the )irt al machine instr ctions for
i +:= 1

are
pnull local i dup int 1 plus asgn

The stac0 after the e.ec tion of local is

The e.ec tion of d p prod ces

#"

The d p instr ction simply ta0es the place of the pn ll and second local instr ctions in the )irt al machine instr ctions for
i := i + 1

+hich are
pnull local i pnull local i int 1 plus asgn

In this case, only a single local instr ction is a)oided( If the )ariable to +hich the assignment is made is not 5 st an identifier b t, instead, a more complicated constr ction, as in
a[j] +:= 1

s bstantial comp tation may be sa)ed by d plicating the res lt of the first arg ment e.pression instead of recomp ting it( "(2(4 6unctions 9hile the meaning of an operation is fi.ed and can be translated into a specific )irt al machine instr ction, the meaning of a f nction call can change d ring program e.ec tion( The )al e of the f nction also can be comp ted( as in
(p[i])(x, y)

The general form of a call is


expr0(expr1, expr2, ..., exprn)

The corresponding )irt al machine instr ctions are


code for code for code for code for invoke n expr0 expr1 expr2 exprn

The in)o0e instr ction is relati)ely complicated, since the )al e of expr0 may be a proced re, an integer 3for m t al e)al ation4, or e)en a )al e that is erroneo s( $ nction in)ocation is disc ssed in detail in Chapter 10(

##

4.& The nterpreter !roper


"(3( 1 The Interpreter Loop The interpreter, +hich is called interp, is basically simple in str ct re( It maintains a location in the icode 3ipc4 and begins by fetching the instr ction pointed to by ipc and incrementing ipc to the ne.t location( It then branches to a section of code for processing the )irt al machine instr ction that it fetched( The interpreter loop is
for (;;) { op = GetWord; switch (op) { case Op_Asgn: case Op_Plus: } continue; }

+here "et9ord is a macro that is defined to be 3KipcNN4( >acros are sed e.tensi)ely in the interpreter to a)oid repetitio s coding and to ma0e the interpreter easier to read( The coding is ill strated by the case cla se for the instr ction pl s@
case Op_Plus: Setup_Op(2); DerefArg(1); DerefArg(2); Call_Op; break; /* e1 + e2 */

*et pV6p3n4 sets res lting code is

p a pointer to the address of 1rg0 on the interpreter stac0( The

rargp = (struct descrip *)(sp -1) -n;

The )al e of n is the n mber of arg ments on the stac0( %eref1rg3n4 dereferences arg ment n( If it is a )ariable, it is replaced by its )al e( Th s, dereferencing is done in place by changing descriptors on the interpreter stac0( CallV6p calls the appropriate C f nction +ith a pointer to the interpreter stac0 as pro)ided by *et pV6p3n4( The f nction itself is obtained by loo0ing p op in an array of pointers to f nctions( The code prod ced by CallV6p is
(*(optab(op]) )(rargp); Sp = (word * )rargp + 1:

1&&

Chapter #: 4xpression 4valuation


PCE*PCCTI'C@ The preceding chapter presents the essentials of the interpreter and e.pression e)al ation as it might ta0e place in a con)entional programming lang age in +hich e)ery e.pression prod ces e.actly one res lt( $or e.ample, e.pressions s ch as
i := j k := i + j i +:= ?k

each prod ce a single res lt@ they can neither fail nor can they prod ce seI ences of res lts( The one feat re of Icon that disting ishes it most clearly from other programming lang ages is the capacity of its e.pression-e)al ation mechanism to prod ce no res lt at all or to prod ce more than one res lt( $rom this capability come ncon)entional methods of controlling program flo+, no)el control str ct res, and goal-directed e)al ation( The generality of this e.pression-e)al ation mechanism alone sets Icon apart from other programming lang ages( 9hile generators, in one form or another, e.ist in a n mber of programming lang ages, s ch as IP&-' 3#e+ell 1:B14, C&U 3&is0o) 1:G14, 1lphard 3*ha+ 1:G14, and *CT& 3%e+ar, *chonberg, and *ch+art7 1:G14, s ch generators are limited to specific constr cts, designated conte.ts, or restricted types of data( &ang ages +ith pattern-matching facilities, s ch as *#6/6&< 3"ris+old, Poage, and Polons0y 1:F14, Inter&isp 3Teitelman 1:F<4, and Prolog 3Cloc0sin and >ellish 1:G14, generate alternati)e matches, b t only +ithin pattern matching( J st as IconJs e.pression-e)al ation mechanism disting ishes it from other programming lang ages, it is also one of the most interesting and challenging aspects of IconJs implementation( Its applicability in e)ery conte.t and to all 0inds of data has a per)asi)e effect on the implementation(

7.1 Bo#nded 58pressions


1 clear nderstanding of the semantics of e.pression e)al ation in Icon is necessary to nderstand the implementation( 6ne of the most important concepts of e.pression e)al ation in Icon is that of a bounded expression, +ithin +hich bac0trac0ing can ta0e place( =o+e)er, once a bo nded e.pression has prod ced a res lt, it cannot be res med for another res lt( $or e.ample, in
write(i = find(s1,s2))

find may prod ce a res lt and may be res med to prod ce another res lt if the comparison fails( 6n the other hand, in
write(i = find(s1, s2)) write(j = find(s1, s3))

the t+o lines constit te separate e.pressions( 6nce the e)al ation of the e.pression on the first line is complete, it cannot be res med( &i0e+ise, the e)al ation of the e.pression on the second line is not affected by +hether the e.pression on the first line s cceeds or fails( =o+e)er, if the t+o lines are 5oined by a con5 nction operation, as in
write(i = find(s1, s2)) & write(i = find(s1, s3))

1&1

they are combined into a larger Wingle e.pression and the e.pression on the second line is not e)al ated if the e.pression on the first line fails( *imilarly, if the e.pression on the first line s cceeds, b t the e.pression on the second line fails, the e.pression on the first line is res med( The reason for the difference in the t+o cases is obsc red by the fact that the Icon translator a tomatically inserts a semicolon at the end of a line on +hich an e.pression is complete and for +hich a ne+ e.pression begins on the ne.t line( ConseI ently, the first e.ample is eI i)alent to
write(i = find(s1, s2)); write(i = find(s1 , s3))

The difference bet+een the semicolon and the con5 nction operator is s bstantial( 1 semicolon bo nds an e.pression, +hile an operator binds its operands into a single e.pression( /o nded e.pressions are enclosed in o)als in the follo+ing e.amples to ma0e the e.tent of bac0trac0ing clear( 1 compo nd e.pression, for e.ample, has the follo+ing bo nded e.pressions@
{ ED ED ...; exprn}

#ote that exprn is not, of itself, a bo nded e.pression( =o+e)er, it may be part of a larger bo nded e.pression( as in
({expr1: expr2; ...; exprn}=)

=ere exprn is part of the bo nded e.pression for the comparison operator( The entire enclosing bo nded e.pression is a conseI ence of the final semicolon( In the absence of the conte.t pro)ided by this semicolon, the entire e.pression might be part of a larger enclosing bo nded e.pression, and so on( The separation of a proced re body into a n mber of bo nded e.pressions, separated by semicolons 3e.plicit or implicit4 and other syntactic constr ctions, is )ery important( 6ther+ise, a proced re body +o ld consist of a single e.pression, and fail re of any component +o ld propagate thro gho t the entire proced re body( Instead, control bac0trac0ing is limited in scope to abo nded e.pression, as is the lifetime 3and hence stac0 space4 for temporary comp tations( /o nded e.pressions are partic larly important in control str ct res( $or e.ample, in the if-then-else control str ct re, the control e.pression is bo nded b t the other e.pressions are not@
if expri then expr2 else expr3

1s +ith the compo nd e.pression ill strated earlier, expr2 or exp13 3+hiche)er is selected4 may be the part of a larger bo nded e.pression( 1n e.ample is
write( if i < j then i to j else j to i )

If the control e.pression +ere not a separate bo nded e.pression, the fail re of expr2 or exp13 +o ld res lt in bac0trac0ing into it and the if-then-else e.pression +o ld be eI i)alent to
(expr1 & expr2) | expr3

+hich is hardly +hat is meant by if-then-else( In a +hile-do loop, the control e.pression and the e.pression in the do cla se are both bo nded@

1&2

while expri do expri

The t+o bo nded e.pressions ens re that the e.pressions are e)al ated independently of each other and any s rro nding conte.t( $or e.ample, if expr2 fails, there is no control bac0trac0ing into expr, #(1(1 4xpression 6rames In the implementation of Icon, the scope of bac0trac0ing is delineated by expression frames. The )irt al machine instr ction
mark L1

starts an e.pression frame( If the s bseI ent e.pression fails, ipc is set to the location in the icode that corresponds to & 1( The )al e of ipc for a label is relati)e to the location of the icode that is read in from the icode file( $or simplicity in the description that follo+s, the )al e of ipc is referred to 5 st by the name of the corresponding label( The mar0 instr ction p shes an expression frame mar"er onto the stac0 and sets the e.pression frame pointer, efp, to it( Th s, efp indicates the beginning of the c rrent e.pression frame( There is also a generator frame pointer, gfp, +hich points to another 0ind of frame that is sed to retain information +hen an e.pression s spends +ith a res lt and is capable of being res med for another( "enerator frames are described in *ec( :(3( The mar0 instr ction sets gfp to 7ero, indicating that there is no s spended generator in a ne+ e.pression frame( 1n e.pression frame mar0er consists of fo r +ords@ the )al e ipc for the arg ment of mar0 3called the fail re ipc4, the pre)io s efp, the pre)io s gfp, and ile)el, +hich is related to s spended generators@

1n e.pression frame mar0er is declared as a C str ct re@


struct ef_marker { /* expression frame marker */ word *ef_failure; /* failure ipc */ struct ef_marker *ef_efp; /* efp */ struct gf_marker *ef_gfp; /* gfp */ word ef_ilevel; /* ilevel */

This str ct re is o)erlaid on the interpreter stac0 in order to reference its components( The code for the mar0 instr ction is
case Op_Mark: /* create expression frame marker */ newefp = (struct ef_marker *)(sp + 1); opnd = GetWord; opnd += (word)ipc; newefp->ef_failure = (word *)opnd; newefp->ef_gfp = gfp; newefp->ef_efp = efp; newefp->ef_ilevel = ilevel;

1&3

sp += Wsizeof(*efp); efp = newefp; gfp = 0; break;

The macro 9si7eof3.4 prod ces the si7e of . in +ords( 1n e.pression frame is remo)ed by the )irt al machine instr ction
unmark

+hich restores the pre)io s efp and gfp from the c rrent e.pression frame mar0er and remo)es the c rrent e.pression frame by setting sp to the +ord 5 st abo)e the frame mar0er( The se of mar0 and nmar0 is ill strated by
if expr1 then expr2 else expr3

for +hich the )irt al machine instr ctions are


mark L1 code for expr1 unmark code for expr2 goto L2 L1: code for expr3 L2:

The mar0 instr ction creates an e.pression frame for the e)al ation of expr1. If expr1 prod ces a res lt, the nmar0 instr ction is e)al ated, remo)ing the e.pression frame for expr1, along +ith the res lt prod ced by expr1. C)al ation then proceeds in expr2. If expr1 fails, control is transferred to the location in the icode corresponding to & 1 and the nmar0 instr ction is not e.ec ted( In the absence of generators, fail re also remo)es the c rrent e.pression frame, as described in *ec( :(2( It is necessary to sa)e the pre)io s )al e of efp in a ne+ e.pression mar0er, since e.pression frames may be nested( This occ rs in interesting +ays in some generati)e control str ct res, +hich are disc ssed in *ec( :(<( #ested e.pression frames also occ r as a res lt of e)al ating compo nd e.pressions, s ch as
while expr1 do ifexpr2thenexpr2

7.$ %ail#re
The interesting aspects of implementing e.pression e)al ation in Icon can be di)ided into t+o cases@ +itho t generators and +ith generators( The possibility of fail re in the absence of generators is itself of interest, since it occ rs in other programming lang ages, s ch as *#6/6&<( This section describes the handling of fail re and ass mes, for the moment, that there are no generators( The ne.t section describes generators( In the absence of generators, if fail re occ rs any+here in an e.pression, the entire e.pression fails +itho t any f rther e)al ation( $or e.ample, in the e.pressions
i := numeric(s) line := read(f)

if n meric3s4 fails in the first line, the assignment is not performed and e)al ation contin es immediately +ith the second line( In the implementation, this amo nts to remo)ing the

1&4

c rrent e.pression frame in +hich fail re occ rs and contin ing +ith ipc set to the fail re ipc from its e.pression frame mar0er( The )irt al machine instr ctions for the pre)io s e.ample are
mark L1 pnull local i global numeric local s invoke 1 asgn unmark L1: mark L2 pnull local line global read local f invoke 1 asgn unmark L2:

Prior to the e)al ation of the e.pression on the first line, there is some e.pressio frame on the stac0@

The instr ction


mark L1

starts a ne+ e.pression frame( The e.ec tion of s bseI ent )irt al machine instr ctions p shes additional descriptors( The state of the stac0 +hen n meric is called by the in)o0e instr ction is

1&5

If n meric fails, efp and sp are reset, so that the stac0 is in the same state as it +a s prior to the e)al ation of the e.pression on the first line@

Control is transferred to the location in the icode corresponding to &1, and the e.ec tion of
mark L2

starts a ne+ e.pression frame by p shing a ne+ e.pression frame mar0er onto the stac0( It is +orth noting that fail re ca ses only the c rrent e.pression frame to be remo)ed and changes ipc to the fail re ipc( 1ny remaining )irt al machine instr ctions in the c rrent e.pression frame are bypassed, fail re is simple and I ic0(

1&

$ail re can occ r at three le)els@ directly from the )irt al machine instr ction efail, from a C f nction that implements an operator or f nction 3as in the pre)io s e.ample4, or from an Icon proced re( 9hen a conditional operator or f nction ret rns, it signals the interpreter, indicating +hether it is prod cing a res lt or failing by sing one of the t+o forms of ret rn, Eet rn or $ail( These macros simply prod ce ret rn statements +ith different ret rned )al es( The code in the interpreter for a conditional operation is ill strated by
case Op_Numlt: Setup_Op(2); DerefArg(1); DerefArg(2) ; Call_Cond; break; /* e1 < e2 */

The macro CallVCond is similar to CallV6p described in *ec( G(3(1, b t it tests the signal ret rned by the C f nction( If the signal corresponds to the prod ction of a res lt, the brea0 is e.ec ted and control is transferred to the beginning of the interpreter loop to fetch the ne.t )irt al machine instr ction( 6n the other hand, if the signal corresponds to fail re, control is transfeITed to the place in the interpreter that handles fail re, efail( 1n Icon proced re can fail in three +ays@ by e)al ating the e.pression fail, by the fail re of the arg ment of a ret rn e.pression, or by flo+ing off the end of the proced re body( The )irt al machine instr ctions generated for the three cases are similar( $or e.ample, the )irt al machine instr ctions for
if i < j then fail else write(j)

are
mark L1 pnull local i local j numlt unmark pfail L1: global write local j invoke 1

The )irt al machine instr ction pfail first ret rns from the c rrent proced re call 3see *ec( 10(34, and then transfers to efail(

7.& 9enerators and 9oal+-irected 56al#ation


The capability of an e.pression not to prod ce a res lt is sef l for controlling program flo+ and for bypassing nneeded comp tation, b t generators add the real po+er and e.pressi)eness to the e.pression-e)al ation semantics of Icon( It sho ld be no s rprise that generators also present diffic lt implementation problems( There are se)eral 0inds of generators, incl ding those for control str ct res, f nctions and operators, and proced res( 9hile the implementation of the different 0inds of generators )aries in detail, the same principles apply to all of them(

1&!

1s far as sing a res lt of an e.pression in f rther comp tation is concerned, there is no difference bet+een an e.pression that simply prod ces a res lt and an e.pression that prod ces a res lt and is capable of being res med to prod ce mother one( $or e.ample, in
i := numeric("2") j := upto('aeiou', "Hello world")

the t+o assignment operations are carried o t in the same +ay, e)en tho gh pto is a generator and n meric is not( *ince s ch conte.ts cannot be determined, in general, prior to the time the e.pressions are e)al ated, the implementation is designed so that the interpreter stac0 is the same, as far as enclosing e.pressions are concerned, +hether an e.pression ret rns or s spends( $or the pre)io s e.ample, the arg ments to the assignment operation are in the same relati)e place in both cases( 6n the other hand, if a generator that has s spended is res med, it m st be capable of contin ing its comp tation and possibly prod cing another res lt( $or this to be possible, both the generatorJs state and the state of the interpreter stac0 m st be preser)ed( $or e.ample, in
j := (i < upto('aeiou', "Hello world"))

+hen the f nction pto s spends, both i and the res lt prod ced by pto m st be on the stac0 as arg ments of the comparison operation( =o+e)er, if the comparison operation fails and pto is res med, the arg ments of pto m st be on the stac0 as they +ere +hen pto s spended( To satisfy these reI irements, +hen pto s spends, a portion of the stac0 prior to the arg ments for pto is copied to the top of the stac0 and the res lt prod ced by pto is placed on the top of the stac0( Th s, the portion of the stac0 reI ired for the res mption of pto is preser)ed and the arg ments for the comparison are in the proper place( Generator #rames. 9hen an e.pression s spends, the state of the interpreter stac0 is preser)ed by creating a generator frame on the interpreter stac0 that contains a copy of the portion of the interpreter stac0 that is needed if the generator is res med( 1 generator frame begins +ith a generator frame mar0er that contains information abo t the interpreter state that m st be restored if the corresponding generator is res med( There are three 0inds of generator frames that are disting ished by different codes@
=,>susp s spension from a C f nction =,Csusp s spension from an alternation e.pression =,Dsusp s spension from a proced re

$or the first t+o types of generators, the information sa)ed in the generator frame mar0er incl des the code for the type of the generator, the i-state )ariables efp, gfp, ipc, and the so rce-program line n mber at the time the generator frame is created@

1&"

The corresponding C str ct re is


struct gf_marker { /* generator frame marker */ word gf_gentype; /* type */ struct ef_marker *gf_efp; /* efp */ struct gf_marker *gf_gfp; /* gfp */ word *gf_ipc; /* ipc */ word gf_line; /* line number */ };

"enerators for proced re s spension contain, in addition, the i-state )ariable related to proced res( *ee *ec( 10(3(3( 1s an e.ample, consider the e.pression
write(i = (1 to 3;

The )irt al machine instr ctions for this e.pression are ,


mark L1 global write pnull local i int 1 int 3 push 1 # default increment toby numeq invoke 1 unmark L1 :

The state of the stac0 after e.ec tion of the first se)en instr ctions is

1&#

The code in the interpreter for calling a generati)e operator +ith n arg ments is
rargp = (struct descrip *)(sp -1) -n; signal = (*(optab[op]))(rargp); goto C_rtn_term;

#ote that rargp points to 1rg0 and is the arg ment of the call to the C f nction for the operator( The C f nction for toby is
OpDcl(toby, 3, "toby") { long from, to, by; /* * Arg1 (from), Arg2 (to), and Arg3 (by) must be integers. * Also, Arg3 must not be zero. */ if (cvint(&Arg1, &from) == CvtFail) runerr(101, &Arg1); if (cvint(&Arg2, &to) == CvtFail) runerr(1 01, &Arg2); if (cvint(&Arg3, &by) == CvtFail) runerr(101, &Arg3); if (by == 0) runerr(211, &Arg3); /* * Count up or down (depending on relationship of from * and to) and suspend each value in sequence, failing * when the limit has beer exceeded. */ if (by > 0)

11&

for ( ; from <= to; from += by) { Ekint(from, &5r60)- 2/ make an inte6er descriptor @uspend} else for ( ; from >= to; from += by) { Mkint(from, &Arg0); Suspend; } Fail; }

/2

The 6p%cl macro, +hich is similar to $nc%cl, prod ces


toby( cargp) register struct descrip *cargp;

so that toby is called +ith a pointer to 1rg0( The macros 1rg0, 1rg1, and so forth are defined as
#define Arg0 (cargp[O]) #define Arg1 (cargp[1])

9hen toby is called, it replaces its 1rg0 descriptor by a descriptor for the integer I and s spends by sing the * spend macro rather than Eet rn( The * spend macro calls interp instead of ret rning to it( This lea)es the call of toby intact +ith its )ariables preser)ed and also transfers control to interp so that the ne.t )irt al machine instr ction can be interpreted( =o+e)er, it is necessary to p sh a generator mar0er on the interpreter stac0 and copy a portion of the interpreter stac0, so that interpretation can contin e +itho t changing the portion of the interpreter stac0 that toby needs in case it is res med( This is accomplished by calling interp +ith arg ments that signal it to b ild a generator frame( The definition of * spend is
#define Suspend {\ int rc; \ if ((rc = interp(G_Csusp, cargp)) != A_Resumption) \ return rc; \ }

The arg ment "VCs sp in the call of interp indicates that a generator frame for a C f nction is needed( The arg ment cargp points to the location on the interpreter stac0 +here 1rg0 for the s spending C f nction is located( This location is the same as rargp in the call of interp that called pta( In this sit ation, interp p ts a generator frame mar0er on the interpreter stac0 and copies the portion of the interpreter stac0 from the last e.pression 01 generator frame mar0er thro gh cargp onto the top of the interpreter stac0@

111

The stac0 is e.actly the same, as far as the e.ec tion of n meI is concerned, as it +o ld ha)e been if to by had simply ret rned( =o+e)er, the arg ments of toby 3and the preceding arg ments of n meI4 are still intact, so that toby can be res med( The generator frame is interposed bet+een the t+o portions of the interpreter stac0( The top of the stac0 corresponds to the e)al ation of
write(i = 1);

Ees mption( * ppose the )al e of i in the pre)io s e.ample is 2( The comparison fails and control is transferred to efail, as it is in the case of all operations that fail( The code for efail is
case Op_Efail: efail: /* * Failure has occurred in the current expression frame. */ if (gfp == 0) {

112

/* * There are no suspended generators to resume. * Remove the current expression frame, restoring * values. * * If the failure ipc is 0, propagate failure to the * enclosing frame by branching back to efail. * This happens, for example, in looping control * structures that fail when complete. */ ipc = efp->ef_failure; gfp = efp->ef-9fp; sp = (word *)efp -1; efp = efp->ef_efp; if (ipc == 0) goto efail; break; } else { /* * There is a, generator that can be resumed. Make * the stack adjustments and then switch on the * type of the generator frame marker. */ register struct gf_marker *resgfp = gfp; tvoe = resgfp->gf gentype; ipc = resgfp->gf_ipc; efp = resgfp->gf_efp; line = resgfp->gf_line; gfp = resgfp->gf_gfp; sp = (word * )resgfp -1; switch (type) { case =,>susp: $ 77ilevelreturn A_Resumption; break} case G_Esusp: goto efail; case G_Psusp: break; } break; }

If there +ere no generator frame 3if gfp +ere 04, the entire e.pression frame +o ld be remo)ed, and the e.pression +o ld fail as described in *ec( :(2( =o+e)er, since there is a CV* sp generator frame, the stac0 is restored to the state it is in +hen toby s spended, and the )al es sa)ed in the generator frame mar0er are restored$

113

1ll traces of the first e.ec tion of n meI ha)e been remo)ed from the stac0( 1s sho+n by the code for efail, the call to toby is res med by returning to it from interp +ith the signal 1VEes mption, +hich indicates another res lt is needed( 9hen control is ret rned to toby, it changes its 1rg0 descriptor to the integer 2 s spends again@

114

The interpreter stac0 is e.actly as it +as +hen toby s spended the first time, e.cept that the integer 2 is on the stac0 in place of the integer 1( The top of the stac0 corresponds to the e)al ation of
write(i = 2);

*ince the )al e of i is 2, n meI s cceeds( It copies the )al e of its second arg ment to its 1rg0 descriptor and ret rns( The )al e 2 is +ritten and the nmar0 instr ction is e.ec ted, remo)ing the entire e.pression frame from the stac0( Goal.)irected $&al"ation. "oal-directed e)al ation occ rs +hen an e.pression fails and there are generator frames on the interpreter stac0 as the conseI ence of e.pressions that ha)e s spended( In the case of an e.pression s ch as

115

1 to upto(c, s)

pto s spends first, follo+ed by toby( These generator frames are lin0ed together, +ith gfp pointing to the one for toby, +hich in t rn contains a pointer to the one for pto( In general, generator frames are lin0ed together +ith gfp pointing to the one for the most recent s spension( This prod ces the last-in, first-o t 3depth-first4 order of e.pression e)al ation in Icon( "oal-directed e)al ation occ rs as a res lt of res ming a s spended e.pression +hen fail re occ rs in the s rro nding e.pression frame( Remo&ing C #rames. *ince C f nctions that s spend call the interpreter and the interpreter in t rn calls C f nctions, e.pression e)al ation typically res lts in a seI ence of frames for calls on the C stac0( 9hen the e)al ation of a bo nded e.pression is complete, there may be frames on the C stac0 for generators, e)en tho gh these generators no longer can be res med( In order to 2 n+ind2 the C stac0 in s ch cases, the i-state )ariable ile)el is sed to 0eep trac0 of the le)el of call of interp by C f nctions( 9hene)er interp is called, it increments ile)el( 9hen an e.pression frame is created, the c rrent )al e of ile)el is sa)ed in it, as ill strated pre)io sly( 9hen the e.pression frame is abo t to be remo)ed, if the c rrent )al e of ile)el is greater than the )al e in the c rrent e.pression frame, ile)el is decremented and the interpreter returns +ith a signal to the C f nction that called it to ret rn rather than to prod ce another res lt( If the signal ret rned by interp is 1VEes mption, the C f nction contin es e.ec tion, +hile for any other signal the C f nction ret rns( *ince C f nctions ret rn to interp, interp al+ays chec0s the signal ret rned to it to determine if it prod ced a res lt or if it is n+inding( If it is n+inding, interp ret rns the n+inding signal instead of contin ing e)al ation of the c rrent e.pression( Consider again the e.pression
write(i = (1 to 3));

for +hich the )irt a1 machine instr ctions are


mark L1 global write pnull local i int 1 int 3 push1 # default increment toby numeq invoke 1 unmark L1 :

9hen toby prod ces a res lt, it calls interp( 9hen the nmar0 instr ction is e.e( c ted, the C stac0 contains a frame for the call to toby and for its call to interp The code for nmar0 is
case Op_Unmark: /* remove expression frame */ gfp = efp->ef-9fp; sp = (word *)efp -1; /* * Remove any suspended C generators. */

11

Unmark_uw: if (efp->ef_ilevel < ilevel) { --ilevel; return A_Unmark_uw; } efp = efp->ef_efp; break;

#ote that in this case * spend gets the ret rn code 1VUnmar0V + and in t rn ret rns 1VUnmar0V + to interp( The section of code in interp that chec0s the signal that is ret rned from C f nctions is
C_rtn_term: switch (signal) { case A_Failure: goto efail; case A_Unmark_uw: /* unwind for unmark */ goto Unmark uw; case A_Lsusp_uw: /* unwind for Isusp */ goto Lsusp_uw; case A_Eret_uw: /* unwind for eret */ goto Eret_uw; case A_Pret_uw: /* unwind for pret */ goto Pret_uw; case A_Pfail_uw: /* unwind for pfail */ goto Pfail_uw; } sp = (word * )rargp + 1; /* set sp to result */ continue; }

Th s, +hen interp ret rns to a C f nction +ith an n+inding signal, there is a cascade of C ret rns ntil ile)el is the same as it +as +hen the c rrent e.pression frame +as created( #ote that there are se)eral cases in addition to nmar0 +here n+inding is necessary(

7.( 9enerati6e Control ,tr#ct#res


In addition to f nctions and operators that may generate more than one res lt, there are se)eral generati)e control str ct res at the le)el of )irt al machine instr ctions( #(4(1 2lternation The )irt al machine instr ctions for

are
mark L1 code for expr2 esusp goto L2 L1: code for exp13 L2:

The mar0 instr ction creates an e.pression frame mar0er for alternation +hose p rpose is to preser)e the fail re ipc for &1 in case the res lts for expr3 are needed( If expr2 prod ces a res lt, es sp creates a generator frame +ith the s al mar0er and then copies

11!

the portion of the interpreter stac0 bet+een the last e.pression or generator frame mar0er and the alternation mar0er to the top of the stac0( It then p shes a copy of the res lt prod ced by expr2. This connects the res lt prod ced by expr2 +ith the e.pression prior to the alternation control str ct re( #e.t, es sp sets efp to point to the e.pression frame mar0er prior to the alternation mar0er( $or e.ample, in the e.pression
write(i := 1 | 2)

the stac0 after the e.ec tion of es sp is

The top portion of the stac0 is the same as if expr2 had prod ced a res lt in the absence of alternation( =o+e)er, the generator frame mar0er p shed by es sp contains a pointer to the alternation mar0er( If another res lt from expr2 is needed, the generator frame left by es sp is remo)ed, restoring the stac0 to its state +hen expr2 prod ced a res lt( If expr2 itself +as a generator that s spended, it is res med( 6ther+ise, control is transferred to efail and ipc is set to a )al e corresponding to &1, so that expr3 is e)al ated ne.t(

11"

#(4(2 *epeated 2lternation 1lternation is the general model for generati)e control str ct res( Eepeated alternation, Oexpr, is similar to alternation, and +o ld be eI i)alent to
expr | expr | expr | ...

e.cept for a special termination condition that ca ses repeated alternation to stop if expr does not prod ce a res lt( 9itho t this termination condition, an e.pression s ch as
|upto(c, s)

+o ld ne)er ret rn if pto failed-e.pression e)al ation +o ld )anish into a 2blac0 hole(2 C.pressions that prod ce res lts at one time b t not at another also are sef l( $or e.ample(
|read()

generates the lines from the standard inp t file( /eca se of the termination condition, this e.pression terminates +hen the end of the inp t file is reached( If it )anished into a 2blac0 hole,2 it co ld not be sed safely( If it +ere not for the termination condition, the )irt al machine instr ctions for lexpr +o ld be
L1: mark L1 code for expr esusp

The 2blac0 hole2 here is e)ident---if expr fails, it is e)al ated again and there is no +ay o t( The termination condition is handled by an instr ction that changes the fail re ipc in the c rrent e.pression mar0er( The act al )irt al machine instr ctions for lexpr are
L1 : mark0 code for expr chfail L1 esusp

The )irt al machine instr ction mar00 p shes an e.pression frame mar0er +ith a 7ero fail re ipc( If a 7ero fail re ipc is enco ntered d ring fail re, as ill strated by the code for efail in *ec( :(3, fail re is transmitted to the enclosing e.pression( If expr prod ces a res lt, ho+e)er, the chfail instr ction is e.ec ted( It changes the fail re ipc in the c rrent e.pression mar0er to correspond to &1, so that if expr does not prod ce a res lt +hen it is res med, e.ec tion starts at the location in the icode corresponding to &1 again, ca sing another iteration of the alternation loop( It is important to reali7e that chfail only changes the fail re ipc in the c rrent e.pression mar0er on the stac0( * bseI ent e.ec tion of mar00 creates a ne+ e.pression frame +hose mar0er has a 7ero fail re ipc( #(4(3 Limitation In the limitation control str ct re,
expr1 \ expr2

the normalleft-to-right e)al ation of Icon is re)ersed and expr2 is e)al ated first( The )irt al machine instr ctions are
code for expr2 limit

11#

code for expr1 lsusp

If expr2 s cceeds, its res lt is on the top of the stac0( The limit instr ction chec0s this res lt to be s re that it is legal---an integer greater than or eI al to 7ero( If it is not an integer, an attempt is made to con)ert it to one( If the limit )al e is 7ero, limit fails( 6ther+ise, limit creates an e.pression frame mar0er +ith a 7ero fail re ipc and e.ec tion contin es, so that expr1 is e)al ated in its o+n e.pression frame( % ring the e)al ation of expr1, the limit )al e is directly belo+ its e.pression mar0er( $or e.ample( in
expr1 \ 10

the stac0 prior to the e)al ation of expr1 is (X 10 efp -o 0 pre)io s efp pre)io s gfp sp -o ile)el M(X(t(X(X(XX

If expr1 prod ces a res lt, ls sp is e.ec ted( The Is sp instr ction is )ery similar to es sp( /efore prod cing a generator frame, ho+e)er, Is sp decrements the limit )al e( If it becomes 7ero, the e.pression frame for expr1 is remo)ed, the C stac0 is n+o nd, and the last )al e it prod ced is placed on the stac0 in place of the limit )al e( 6ther+ise, it copies the portion of the interpreter stac0 bet+een the end of the last e.pression or generator frame mar0er and the limit )al e to the top of the stac0( #ote that no generator frame is needed(

7.) teration
The difference bet+een e)al ation and res mption in a loop is ill strated by the )irt al machine instr ctions for a con)entional loop
while expr1 do expr2

and the iteration control str ct re


every expr1 do expr2

The instr ctions for +hile-do are


L1: mark0
code for expr1 #nmark
mark "1

code for expr2 #nmark

goto L1

If expr1 fails, the entire e.pression fails and fail re is transmitted to the enclosing e.pression frame beca se the fail re ipc is 7ero( If expr1 prod ces a res lt, expr2 is e)al ated in a separate e.pression frame( 9hether expr2 prod ces a res lt or not, its e.pression frame is remo)ed and e.ec tion contin es at the beginning of the loop( The instr ctions for e)ery-do are
mark0 code for expr1

12&

pop mark0 code for expr2 unmark efail

If expr1 fails, the entire e.pression fails and fail re is transmitted to the enclosing e.pression frame as in the case of +hile-do( If expr1 prod ces a res lt, it is discarded by the pop instr ction, since this res lt is not sed in any s bseI ent comp tation( The e.pression frame for expr1 is not remo)ed, ho+e)er, and expr2 is e)al ated in its o+n e.pression frame +ithin the e.pression frame for expr1 3 nli0e the case for the +hile loop4( If expr2 prod ces a res lt, its e.pression frame is remo)ed and efail is e.ec ted( If expr2 fails, it transmits fail re to the enclosing e.pression frame, +hich is the e.pression frame for expr1. If expr2 prod ces a res lt, efail ca ses fail re in the e.pression frame for expr1. Th s, the effect is the same, +hether or not expr2 prod ces a res lt( 1ll res lts are prod ced simply by forcing fail re( If the e.pression frame for expr1 contains a generator frame, +hich is the case if expr1 s spended, e)al ation is res med accordingly, so that expr1 can prod ce another res lt( If expr1 simply prod ces a res lt instead of s spending, there is no generator frame, efail remo)es its e.pression frame, and fail re is transmitted to the enclosing e.pression frame(

7./ ,tring ,canning


*tring scanning is one of the most sef l operations in Icon( Its implementation, ho+e)er, is comparati)ely simple( There is no special e.pression-e)al ation mechanism associated +ith string scanning per se% all 2pattern matching2 follo+s nat rally from goal-directed e)al ation( The string-scanning 0ey+ords, Ls b5ect and Lpos m st be handled properly, ho+e)er( These 0ey+ords ha)e global scope +ith respect to proced re in)ocation, b t they are maintained in a stac0-li0e fashion +ith respect to string-scanning e.pressions( The e.pression
expr1 ? expr2

is a control str ct re, not an operation, since, by definition, the arg ments for ar operation are e)al ated before the operation is performed( This form of e)al ation does not +or0 for string scanning, since after expr1 is e)al ated, b t before expr2 is e)al ated, the pre)io s )al es of Ls b5ect and Lpos m st be sa)ed and ne+ ones established( $ rthermore, +hen string scanning is finished, the old )al es of Ls b5ect and Lpos m st be restored( In addition, if string scanning s cceeds, the )al es of these 0ey+ords at the time string scanning prod ces a res lt m st be sa)ed so that they can be restored if the string-scanning operation is res med to prod ce another res lt( The )irt al machine instr ctions for
expr1 ? expr2

are
code for expr1 bscan code for expr2 escan

If expr1 s cceeds, it lea)es a res lt on the top of the stac0( The bscan instr ctior ass res that this )al e is a string, performing a con)ersion if necessary( 6ther+ise, the old )al es

121

of Ls b5ect and Lpos are p shed on the stac0, the )al e of Ls b5ect is set to the 3possibly con)erted4 one prod ced by expr1, and Lpos is set to 1( The bscan instr ction then s spends( This is necessary in case expr2 fails, so that bscan can get control again to perform data bac0trac0ing, restoring the pre)io s )al es of Ls b5ect and Lpos from the stac0 +here they +ere sa)ed( If expr2 s cceeds, the escan instr ction copies the descriptor on the top of the stac0 to its 1rg0 position, o)er+riting the res lt prod ced by expr2. It then e.changes the c rrent )al es of Ls b5ect and Lpos +ith those sa)ed by bscan th s restoring the )al es of these 0ey+ords to their )al es prior to the scanning e.pression and at the same time sa)ing the )al es they had at the time expr2 prod ced a res lt( The escan instr ction then s spends( If escan is res med, the )al es of Ls b5ect and Lpos are restored from the stac0, restoring the sit ation to +hat it +as +hen expr2 prod ced a res lt( The escan instr ction then fails in order to force the res mption of any s spended generators left by expr2. * ppose, for e.ample, that the )al es of Ls b5ect and Lpos are 2the2 and 2 respecti)ely, +hen the follo+ing e.pression is e)al ated@
read(f) ? move(4)

* ppose read3f4 prod ces the string 2cocon ts2( The stac0 is

Ls b5ect@ 2the2 Lpos@ 2 The bscan instr ction is e.ec ted, p shing the c rrent )al es of Ls b5ect an :pos;

Ls b5ect@ 2the2 Lpos@ 2 The bscan instr ction sets Ls b5ect to 2cocon ts2 and Lpos to 1( The bsca instr ction then s spends and mo)eY<4 is e)al ated( It s spends, and the top I

122

Ls b5ect@ 2cocon ts2 Lpos@ A The escan instr ction is e.ec ted ne.t( It copies the descriptor on the top of the stac0 to replace the res lt prod ced by expr2. It then e.changes the c rrent )al es of Ls b5ect and Lpos +ith those on the stac0@

Ls b5ect 2the2 Lpos 2 The escan instr ction then s spends, b ilding a generator frame( The res lt of expr2 is placed on the top of the stac0, becoming the res lt of the entire scanning e.pression(

*ince escan s spends, the sa)ed )al es of Ls b5ect and Lpos are preser)ed in a generator frame on the stac0 ntil escan is res med or ntil the c rrent e.pression frame is remo)ed( ECTE6*PCCTI'C@ The implementation of e.pression e)al ation in Icon foc ses on the concept of an e.pression frame +ithin +hich control bac0trac0ing can occ r( /o nded e.pressions, for e.ample, are represented on the stac0 by e.pression frames, +hich confine bac0trac0ing(

123

In the absence of generators, fail re simply res lts in the remo)al of the c rrent e.pression frame and transfer to a ne+ location in the icode, bypassing instr ctions that other+ise +o ld ha)e been e.ec ted( *tate information m st be sa)ed +hen a generator s spends, so that its e)al ation can be res med( This information is preser)ed in a generator frame +ithin the c rrent e.pression frame( "enerator frames are lin0ed together in a last-in, first-o t fashion( "oal-directed e)al ation is a nat ral conseI ence of res ming the most recently s spended generator +hen an e.pression fails, instead of simply remo)ing the c rrent e.pression frame( *tring scanning in)ol)es sa)ing and restoring the )al es of Ls b5ect and Lpos( This is some+hat complicated, since scanning e.pressions can s spend and be res med, b t string scanning itself introd ces nothing ne+ into e.pression e)al ation@ generators and goaldirected e)al ation pro)ide 2pattern matching(2 CDCECI*C* :(1 Circle all the bo nded e.pressions in the follo+ing segments of code@
while line := read() do if *line = i then write(line) if (i = find(s1,s2)) & U = find(s1,s3)) then { write(i) write(i) line? while write(move(1)) do move(1)

:(2 %escribe the effect of nested generators and generators in m t al e)al ation on the interpreter le)el( :(3 Consider a hypothetical control str ct re called exclusive alternation that is the same as reg lar alternation, e.cept that if the first arg ment prod ces least one res lt, the res lts from the second arg ment are not prod ced( *ho+ the )irt al machine instr ctions that sho ld be generated for e.cl si)e alternation( :(< The e.pression read3f4 is an e.ample of an e.pression that may prod ce res lt at one time and fail at another( This is possible beca se of a side effect of e)al ating it---changing the position in the file f( "i)e an e.ample of an e.pression that may fail at one time and prod ce a res lt at a s bseI ent time. :(A There are potential 2blac0 holes2 in the e.pression-e)al ation mechanism of Icon, despite the termination condition for repeated alternation( "i)e an e.ample of one( :(B The e.pression frame mar0er prod ced by limit ma0es it easy to locate the limitation co nter( *ho+ ho+ the co nter co ld be located +itho t the mar0er( :(F * ppose that the )irt al machine instr ctions for
every expr1 do expr2

did not pop the res lt prod ced by expr1. 9hat effect +o ld this ha)e? :(G The )irt al machine instr ctions for
every expr

are
mark0 code for expr pop

124

efail

so that fail re ca ses expr to be res med( The 0ey+ord Lfail also fails, so that the )irt al machine instr ctions for
expr & &fail code for expr efail

It is sometimes claimed that these t+o e.pressions are eI i)alent( If this +ere so, the shorter )irt al machine instr ction seI ence for the second e.pression co ld be sed for the first e.pression( C.plain +hy the t+o e.pressions are not eI i)alent, in general, and gi)e an e.ample in +hich they are different( :(: %iagram the states of the stac0 for the e.ample gi)en in *ec( :(B, sho+ing all generator frames( :(10 *ho+ the s ccessi)e stac0 states for the e)al ation of the follo+ing e.pressions, ass ming that the )al es of Ls b5ect and Lpos are 2the2 and 2 respecti)ely, and that read34 prod ces 2cocon ts2 in each case@ 3a4 read3f4 ? mo)e3104 3b4 3read3f4 ? mo)e3<44 ? >o)e324 3c4 read3f4 ? 3mo)e3<4 ? >o)e32Z4 3d4 3read3f4 ? mo)e3<44 ? >o)e3104 3e4 3read3f4 ? mo)e3<1 BZ ? mo)e3A4 3f4 3read3f4 ? mo)e3<Z4 L 3read3f4 L mo)e310Z :(11 9rite Icon proced res to em late string scanning( &int$ consider the )irt al machine instr ctions for
expr1 ? eXpr2

125

Chapter 1&: 6unctions7 Procedures7 and Co+ 4xpressions


PCEP*CCTI'C@ The in)ocation of f nctions and proced res is central to the e)al ation of e.pressions in most programming lang ages( In Icon, this acti)ity has se)eral aspects that complicate its implementation( $ nctions and proced res are data )al es that can be assigned to identifiers, passed as arg ments, and so on( ConseI ently, the meaning of an in)ocation e.pression cannot be determined ntil it is e)al ated( $ nctions and proced res can be called +ith more or fe+er arg ments than e.pected( Th s, there m st be a pro)ision for ad5 sting arg ment lists at r n time( *ince m t al e)al ation has the same synta. as f nction and proced re in)ocation, r n-time processing of s ch e.pressions is f rther complicated( Co-e.pressions, +hich reI ire separate stac0s for their e)al ation, add comple.ities and dependencies on comp ter architect re that are not fo nd else+here in the implementation(

1<.1 n6ocation 58pressions


1s mentioned in *ec( G(2(<, the )irt al machine code for an e.pression s ch as
expr0(expr1, expr2, ..., exprn)

is
code for code for code for ... code for invoke n expr0 expr1 expr2 exprn

ConseI ently, the stac0 +hen the in)o0e instr ction is e.ec ted is

The meaning of the e.pression, and hence the action ta0en by in)o0e, depends on the res lt prod ced by expr0. If the )al e of expr0 is an integer or con)ertible to an integer, the in)ocation e.pression corresponds to m t al e)al ation( If this integer is negati)e, it is con)erted to the corresponding positi)e )al e +ith respect to the n mber of arg ments, If the )al e is bet+een one and n, the corresponding descriptor is copied on top of the res lt of expr0, sp is set to this position, and in)o0e transfers control to the beginning of the interpreti)e loop( 6n the other hand, if the )al e is o t of range, in)o0e fails( #ote that the ret rned )al e o)er+rites the descriptor for expr0, +hereas for operators a n ll-)al ed descriptor is p shed to recei)e the )al e( If the )al e of expr0 is a f nction or a proced re, the corresponding f nction or proced re m st be in)o0ed +ith the appropriate arg ments( 1 f nction or proced re )al e is represented by a descriptor that points to a bloc0 that contains information abo t the f nction or proced re(

12

1<.$ !roced#re Blocks


$ nctions and proced res ha)e similar bloc0s, and there is no so rce-lang age type distinction bet+een them( (locks for *roced"res. /loc0s for proced res are constr cted by the lin0er, sing information pro)ided by the translator( * ch bloc0s are read in as part of the icode file +hen an Icon program is e.ec ted( The bloc0 for a proced re contains the s al title and si7e +ords, follo+ed by si. +ords that characteri7e the proced re@ 314The icode location of the first )irt al machine instr ction for the proced re( 324 The n mber of arg ments e.pected by the proced re( 334 The n mber of local identifiers in the proced re( 3<4 The n mber of static identifiers in the proced re( 3A4 The inde. in the static identifier array of the first static identifier in the proced re( 3B4 1 C string for the name of the file in +hich the proced re declaration occ rred( The remainder of the proced re bloc0 contains I alifiers@ one for the string name of the proced re, then others for the string names of the arg ments, local identifiers, and static identifiers, in that order( $or e.ample, the proced re declaration
procedure calc(i,j) local k static base, index end

has the follo+ing proced re bloc0@

12!

The 0 )al e for the inde. in the static identifier array indicates that base is the first static identifier in the program( The indices of static identifiers are 7ero-based and increase thro gho t a program as static declarations occ r( (locks for #"nctions. /loc0s for f nctions are created by the macro $nc%cl that occ rs at the beginning of e)ery C f nction that implements an Icon f nction( * ch bloc0s for f nctions are similar to those for proced res b t are disting ished by the )al e -1 in the +ord that other+ise contains the n mber of local identifiers( The entry point is the entry point of the C ro tine for the f nction( The proced re bloc0 for repl is typical@ si7e of bloc0 -T C entry point n mber of arg ments f nction indicator not sed not sed not sed -T 2repl2 #ote that there are no arg ment names( *ome f nctions, s ch as +rite, allo+ an arbitrary n mber of arg ments( This is indicated by the )al e -1 in place of the n mber of arg ments@

si7e of bloc0 -T C entry point indicator of a )ariable n mber of arg ments f nction indicator not sed not sed not sed -T 2+rite2

1<.& n6ocation
1&(3(1 2rgument Processing 1rg ment processing begins by dereferencing the arg ments in place on the stac0( If a fi.ed n mber of arg ments is specified in the proced re bloc0, this n mber is compared +ith the arg ment of in)o0e, +hich is the n mber of arg ments on the stac0( If there are too many arg ments, sp is set to point to the last one e.pected( $or e.ample, the e.pression
numeric(i, j)

res lts in

12"

*ince n meric e.pects only one arg ment, sp is reset, effecti)ely popping the second arg ment@

6n the other hand, if there are not eno gh arg ments, n ll-)al ed descriptors p shed to s pply the missing arg ments( $or e.ample, the e.pression
left(s, i)

res lts in

and a n ll )al e is p shed to pro)ide the missing third arg ment@

12#

1&(3(2 6unction Invocation $ nction in)ocation in)ol)es calling a C f nction in a fashion that is )ery similar to e)al ating an operator( In the case of an Icon f nction, the entry point of the corresponding C f nction is obtained from the proced re bloc0 rather than by inde.ing an array of f nction pointers corresponding to operator codes( $or an Icon f nction that has a fi.ed n mber of arg ments, the C f nction is called +ith a single arg ment that is a pointer to the location of 1rg0 on the interpreter stac0( #ote that 1rg0 is the descriptor that points to the proced re bloc0( $or an Icon f nction that may be called +ith an arbitrary n mber of arg ments, the C f nction is called +ith t+o arg ments@ the n mber of arg ments on the stac0 and a pointer to 1rg0( &i0e an operator, a f nction may fail, ret rn a res lt, or s spend( The coding protocol is the same as for operators( The f nction find is an e.ample@
FncDcl(find,4) { register word I; register char *s1, *s2; word i, j, t; long 11, 12; char sbuf1 [MaxCvtLen], sbuf2[MaxCvtLen]; /* * Arg1 must be a string. Arg2 defaults to &subject; Arg3 * to &pos if Arg2 is defaulted, or to 1 otherwise; Arg4 to 0. */ if (cvstr(&Arg1, sbuf1) == CvtFail) runerr(1O3, &Arg1); if (defstr(&Arg2, sbuf2, &k_subject)) defint(&Arg3, &11, k-pos); else defint(&Arg3, &11, (word)1); defint(&Arg4, &12, (word)0); /* * Convert Arg3 and Arg4 to absolute positions in Arg2 al */ i = cvpos(11, StrLen(Arg2)); if (i == 0) Fail; j = cvpos(12, StrLen(Arg2)); if (j ==0) Fail;

13&

if (i > j) { t = i; i = j; j = t; } /* * Loop through Arg2[i:j] trying to find Arg1 at each point, * stopping when the remaining portion Arg2[i:j] is too short * to contain Arg1 */ Arg0.dword = D_lnteger; while (i <= j -StrLen(Arg 1)) { s1 = StrLoc(Arg1); s2 = StrLoc(Arg2) + i -1; l = StrLen(Arg1); /* * Compare strings on a byte-wise basis; if the end is reached * before inequality is found, suspend with the position of * the string. */ do { if (1-- <= 0) { IntVal(Arg0) = i; Suspend; break; } } while (*s1 ++ == *s2++); i++; } Fail; }

1&(3(3 Procedure Invocation In the case of proced re in)ocation, a procedure frame is p shed onto the interpreter stac0 to preser)e information that may be changed d ring the e.ec tion of the proced re and that m st be restored +hen the proced re ret rns( 1s for other types of frames, a proced re frame begins +ith a mar0er( 1 proced re frame mar0er consists of eight +ords@

The c rrent proced re frame is pointed to by pfp, and argp points to the place on the interpreter stac0 +here the arg ments begin, analogo s to 1rg0 for f nctions( The n mber of arg ments, +hich can be comp ted from pfp and argp, is pro)ided to ma0e comp tations related to arg ments more con)enient(

131

1fter the proced re mar0er is constr cted, a n ll-)al ed descriptor is p shed for each local identifier( $or e.ample, the call
calc(3,4)

for the proced re declaration gi)en in *ec( 10(2 prod ces

6nce the n ll )al es for the local identifiers are p shed, ipc is set to the entry point gi)en in the proced re bloc0 and efp and gfp are set to 7ero( C.ec tion then contin es in the interpreter +ith the ne+ ipc( The three forms of ret rn from a proced re are the same as those from a f nction and correspond to the so rce-lang age e.pressions
return e fail suspend e

The corresponding )irt al machine instr ctions are pret, pfail, and ps sp( $or e.ample, the )irt al machine code for
return &null

is
pnull pret

In the case of pret, the res lt c rrently on the top of the interpreter stac0 is copied on top of the descriptor pointed to by argp( If this res lt is a )ariable that is on the stac0 3and hence local to the c rrent proced re call4, it is dereferenced in place( The C stac0 is n+o nd, since there may be s spended generators a the time of the ret rn( The )al es sa)ed in the proced re frame mar0er arc restored, and e.ec tion contin es in the interpreter +ith the restored ipc( In the case of fail re, the C stac0 is n+o nd as it is for pret, )al es arc restored from the proced re frame mar0er, and control is transferred to efail(

132

Proced re s spension is similar to other forms of s spension( The descriptor on the top of the interpreter stac0 is dereferenced, if necessary, and sa)ed( 1 generator frame mar0er is constr cted on the interpreter stac0 to preser)e )al es that may be needed if the proced re call is res med( $or proced re s spension, a generator frame mar0er contains t+o +ords in addition to those needed for other 0inds of generator frame mar0ers and has the form

1fter the generator frame mar0er is p shed, the portion of the stac0 bet+een the last generator or e.pression frame mar0er before the call to this proced re and the +ord prior to argp is copied to the top of the stac0( $inally, the sa)ed descriptor, +hich is the res lt prod ced by the proced re, is p shed on the top of the stac0( C.ec tion then contin es in the interpreter +ith the restored ipc(

1<.( Co+58pressions
Co-e.pressions add another dimension to e.pression e)al ation in Icon( The important thing to nderstand abo t co-e.pressions is that Icon e)al ation is al+ays in some coe.pression( 1ltho gh it is not e)ident, the e.ec tion of an Icon program begins in a coe.pression, namely the )al e of Lmain( 1 co-e.pression reI ires both an interpreter stac0 and a C stac0( In the co-e.pression for Lmain, the interpreter stac0 is statically allocated and the C stac0 is the one normally sed for C e.ec tion-the 2system stac02 on some comp ters( The creation of a ne+ coe.pression prod ces a ne+ interpreter stac0 and a ne+ C stac0, as +ell as space that is needed to sa)e state information( 9hen a co-e.pression is acti)ated, the conte.t for e)al ation is changed to the stac0s for the acti)ated co-e.pression( 9hen the acti)ation of a co-e.pression prod ces a res lt, it in t rn acti)ates the co-e.pression that acti)ated it, lea)ing the stac0s from +hich the ret rn occ rred in a state of s spension( Th s, coe.pression acti)ation constit tes a simple conte.t s+itch( In e)ery co-e.pression, e.pression e)al ation is in some state, possibly acti)ely e.ec ting, possibly s spended, or possibly complete and nreachable( The )irt al machine instr ctions for
create expr0

are
goto L3 L1 : pop mark L2

133

code for expr0 coret efail L2: cofail goto L2 L3: create L1

Control goes immediately to &3, +here the instr ction create constr cts a co-e.pression bloc0 and ret rns a descriptor that points to it( This bloc0 contains space for i-state )ariables, space for the state of the C stac0, an interpreter stac0, and a C stac0( The code bet+een &1 and &3 is not e.ec ted ntil the co-e.pression is acti)ated( The pop instr ction follo+ing &1 discards the res lt transmitted to a co-e.pression on its first acti)ation, since there is no e.pression +aiting to recei)e the res lt of an initial acti)ation( #e.t, an e.pression frame mar0er is created, and the code for expr0 is e.ec ted( If expr0 prod ces a res lt, coret is e.ec ted to ret rn the res lt to the acti)ating e.pression( If the co-e.pression is acti)ated again, its e.ec tion contin es +ith efail, +hich ca ses any s spended generators in the code for expr0 to be res med( If expr0 fails, the e.pression frame is remo)ed and cofail is e.ec ted( The cofail instr ction is )ery similar to the coret instr ction, e.cept that it signals fail re rather than prod cing a res lt( #ote that if a coe.pression that ret rns by means of cofail is acti)ated again, the cofail instr ction is e.ec ted in a loop( 1 co-e.pression is acti)ated by the e.pression
expr1 @ expr2

for +hich the )irt al machine code is


code for expr1 code for expr2 coact

The more common form of acti)ation, 'expr0, is 5 st an abbre)iation for Ln ll ; expr0% a res lt is al+ays transmitted, e)en if it is the n ll )al e( The )irt al machine code for expr1 prod ces the descriptor for the res lt that is to be transmitted to the co-e.pression being acti)ated( The coact instr ction dereferences the res lt prod ced by expr2, if necessary, and chec0s to ma0e s re it is a co-e.pression( 1fter setting p state information, coact transfers control to the ne+ co-e.pression +ith ipc set to &1( C.ec tion contin es there( If coret is reached, control is restored to the acti)ating co-e.pression( The instr ctions coact and coret are )ery similar( Cach sa)es the c rrent co-e.pression state, sets p the ne+ co-e.pression state, and transfers control( Co.$%pression (locks. There is I ite a bit of information associated +ith a coe.pression, and space is pro)ided for it in a co-e.pression bloc0@

134

The interpreter stac0 and C stac0 sho+n in this diagram are not to scale compared +ith the rest of the bloc0( /oth are comparati)ely large, the act al si7es depend on the address space of the target comp ter( The first +ord of the bloc0 is the s al title( The ne.t +ord contains the n mber of res lts the co-e.pression has prod ced-its 2si7e(2 Then there is a pointer to the ne.t co-e.pression bloc0 on a list that is maintained for garbage collection p rposes( *ee *ec( 11(3(<( $ollo+ing this pointer there are i-state )ariables@ pfp, efp, gfp, argp, ipc, sp, the c rrent program line n mber, and ile)el( Then there is a descriptor for the transmitted res lt, follo+ed by t+o more descriptors@ one for the co-e.pression that acti)ates this one and one for a refresh bloc0 that is needed if a copy of this co-e.pression bloc0 is needed( C state information is contained in an array of +ords, cstate, for registers and possibly other state information( The array cstate typically contains fifteen +ords for s ch information( The C sp is stored in cstateQ0R( The se of the rest of cstate is machine-dependent( $inally, there is an interpreter stac0 and a C stac0( 6n a comp ter +ith a do+n+ardgro+ing C stac0, s ch as the '1D, the base of the C stac0 is at the end of the coe.pression bloc0 and the interpreter and C stac0s gro+ to+ard each other( 6n a comp ter +ith an p+ard-gro+ing C stac0, the C stac0 base follo+s the end of the interpreter stac0(

135

!tack Initiali/ation. 9hen a co-e.pression is first acti)ated, its interpreter stac0 m st be in an appropriate state( This initiali7ation is done +hen the co-e.pression bloc0 is created( 1 proced re frame, +hich is a copy of the proced re frame for the proced re in +hich the create instr ction is e.ec ted, is placed on the ne+ stac0( It consists of the +ords from argp thro gh the proced re frame mar0er and the descriptors for the local identifiers( The efp and gfp in the co-e.pression bloc0 are set to 7ero and the ipc is set to the )al e gi)en in the arg ment to the create instr ction 3&14( #o C state is set p on the ne+ C stac0, this is handled +hen the co-e.pression is acti)ated the first time( The initial n ll )al e for the acti)ator indicates the absence of a )alid C state( Co.$%pression +cti&ation. 1s mentioned pre)io sly, coact and coret perform many similar f nctions- both sa)e c rrent state information, establish ne+ state information, and acti)ate another co-e.pression( The c rrent i-state )ariables are sa)ed in the c rrent coe.pression bloc0, and ne+ ones are established from the co-e.pression bloc0 for the coe.pression being acti)ated( *imilar actions are ta0en for the C state( *ince the C state is machine-dependent, the 2conte.t s+itch2 for the C state is performed by a ro tine, called cos+itch, that contains assembly-lang age code( The C state typically consists of registers that are sed to address the C stac0 and registers that m st be preser)ed across the call of a C f nction( 6n the '1D, for e.ample, the C stac0 registers are sp, ap, and fp. 6nly the registers rB thro gh r11 m st be sa)ed for some C compilers, +hile other C compilers reI ire that r3 thro gh r11 be sa)ed( 6nce the necessary registers are sa)ed in the cstate array of the c rrent co-e.pression, ne+ )al es of these registers are established( If the co-e.pression being acti)ated has been acti)ated before, the C state is set p from its cstate array, and cos+itch ret rns to interp( 1t this point, e.ec tion contin es in the ne+ly acti)ated co-e.pression( Control is transferred to the beginning of the interpreter loop, and the ne.t instr ction 3from the ipc for the coe.pression4 is fetched( =o+e)er, +hen a co-e.pression is acti)ated for the first time, there are no register )al es to restore, since no C f nction has yet been called for the ne+ co-e.pression( This is indicated, as mentioned pre)io sly, by a n ll acti)ator, +hich is comm nicated to cos+itch by an integer arg ment( In this case, cos+itch sets p registers for the call of a C f nction and calls interp to start the e.ec tion of the ne+ co-e.pression( * ch a call to interp on the first acti)ation of a co-e.pression corresponds to the call to interp that starts program e.ec tion in the co-e.pression Lmain for the main proced re( There can ne)er be a ret rn from the call to interp made in cos+itch, since program e.ec tion can only terminate normally by a ret rn from the main proced re, in Lmain( The f nction cos+itch is necessarily machine-dependent( The )ersion for the '1D +ith the /er0eley <(3bsd C compiler is an e.ample@
coswitch(ald_cs, new_cs, first) int *ald_cs, *new_cs; int first; { asm(" mavl 4(ap), rO"); asm(" mavI8(ap),r1"); asm(" mavl sp, O(rO)"); asm(" mavl fp,4(rO)"); asm(" mavl ap,8(rO)"); asm(" mavl r11, 16(rO)"); asm(" mavl r1 0, 20(rO)");

13

asm(" mavl r9,24(r0)"); asm(" mavl r8,28(rO)"); asm(" mavl r7,32(rO)"); asm(" mavl r6,36(rO)"); if (first == 0) { /* this is the first activation */ asm(" mavl 0(r1), sp"); asm(" clrl fp"); . asm(" clrl ap"); interp(0, 0); syserr("interp() returned in caswitch"); } else { asm(" movl 0(r1),sp"); asm(" movl 4(r1), fp"); asm(" movl 8(r1),ap"); asm(" movl 16(r1),r11"); asm(" movl 20(r1), r1 0"); asm(" movJ 24(r1), r9"); asm(" movl 28(r1), r8"); asm(" movl 32(r1),r7"); asm(" movl 36(r1),r6"); % }

The )ariables oldVcs and ne+Vcs are pointers to the cstate arrays for the acti)ating and acti)ated co-e.pressions, respecti)ely( The )al e of first is 0 if the co-e.pression is being acti)ated for the first time( #ote that in order to +rite cos+itch it is necessary to 0no+ ho+ the first t+o arg ments are accessed in assembly lang age( $or the pre)io s e.ample, oldVcs and ne+Vcs are fo r an eight bytes from the ap register, respecti)ely( Refreshing a Co.$%pression( The operation (expr0 creates a copy of the co-e.pression prod ced by expr0 +ith its state initiali7ed to +hat it +as +hen it +as originally created( The refresh bloc0 for expr0 contains the information necessary to reprod ce the initial state( The refresh bloc0 contains the original cip for the co-e.pression, the n mber of local identifiers for the proced re in +hich expr0 +as created, a copy of the proced re frame mar0er at the time of creation, and the )al es of the arg ments and local identifiers at the time of creation( Consider, for e.ample,
procedure labgen(s) local i, j, e i := 1 j := 100 e := create (s || (i to j) || ":") end

$or the call labgen3JJ&24, the refresh bloc0 for e is

13!

ECTE6*PCCTI'C@ In)ocation e.pressions are more complicated to implement than operators, since the meaning of an in)ocation e.pression is not 0no+n ntil it is e)al ated( *ince f nctions and proced res are so rce-lang age )al es, the information associated +ith them is stored in bloc0s in the same manner as for other types( The C code that implements Icon f nctions is +ritten in the same fashion as the code for operators( Proced res ha)e so rce-lang age analogs of the fail re and s spension mechanisms sed for implementing f nctions and operators( Proced re frames identify the portions of the interpreter stac0 associated +ith the proced res c rrently in)o0ed( 1 co-e.pression allo+s an e.pression to be e)al ated o tside its le.ical site in the program by pro)iding separate stac0s for its e)al ation( The possibility of m ltiple stac0s in )ario s states of e)al ation introd ces technical problems in the implementation, incl ding a machine-dependent conte.t s+itch( 454*CI)4) 01.0 9hat happens if a call of a proced re or f nction contains an e.tra arg ment e.pression, b t the e)al ation of that e.pression fails? 01.2 *ometimes it is sef l to be able to specify a f nction or proced re by means of its string name( Icon s pports 2string in)ocation,2 +hich allo+s the )al e of expr0 in
expr0(exprj, expr2, ..., exprn) .

to be a string( Th s,
"write"(s)

prod ces the same res lt as

13"

write(s)

6f co rse, s ch a string name is s ally comp ted, as in 3read643s4 %escribe +hat is in)ol)ed in implementing this aspect of in)ocation( 6perators also may be in)o0ed by their string names, as in 2N23i,54 9hat is needed in the implementation to s pport this facility? Can a control str ct re be in)o0ed by a string name? 01.3 If the res lt ret rned by a proced re is a )ariable, it may need to be dereferenced( This is done in the code for pret and ps sp( $or e.ample, if the res lt being ret rned is a local identifier, it m st be replaced by its )al e 9hat other 0inds of )ariables m st be dereferenced? Is there any difference in the dereferencing done by pret and ps sp? 01.4 =o+ is the str ct re of a co-e.pression bloc0 different on a comp ter +ith an p+ard-gro+ing C stac0 compared to one +ith a do+n+ard-gro+ing C stac0? 9hat is the difference bet+een the t+o cases in terms of potential storage fragmentation?

13#

Chapter 11: )torage 3anagement


PCE*PCCTI'C@ The implementation of storage management m st accommodate a +ide range of allocation reI irements( 1t the same time, the implementation m st pro)ide generality and some compromise bet+een 2normal2 programs and those that ha)e n s al reI irements( 1ltho gh it is clearly sensible to satisfy the needs of most programs in an efficient manner, there is no +ay to define +hat is typical or to predict ho+ programming style and applications may change( Indeed, the performance of the implementation may affect both programming style and applications( *trings and bloc0s can be created d ring program e.ec tion at times that cannot be predicted, in general, from e.amination of the te.t of a program( The si7es of strings and of some types of bloc0s may )ary and may be arbitrarily large, altho gh practical considerations dictate some limits( There may be an arbitrary n mber of strings and bloc0s( The 2lifetimes2 d ring +hich they may be sed are arbitrary and are nrelated, in general, to proced re calls and ret rns( %ifferent programs )ary considerably in the n mber, type, and si7es of data ob5ects that are created at r n time( *ome programs read in strings, transform them, and +rite them o t +itho t e)er creating other types of ob5ects( 6ther programs create and manip late many lists, sets, and tables b t se fe+ strings( Eelati)ely fe+ programs se coe.pressions, b t there are applications in +hich large n mbers of co-e.pressions are created( *ince a program can constr ct an arbitrary n mber of data ob5ects of arbitrary si7es and lifetimes, some mechanism is needed to allo+ the re se of space for 2dead2 ob5ects that are no longer accessible to the program( Th s, in addition to a mechanism for allocating storage for ob5ects at r n time, there m st be a storage-reclamation mechanism, +hich s ally is called garbage collection. The methods sed for allocation and garbage collection are interdependent( *imple and fast allocation methods s ally reI ire comple. and time-cons ming garbage-collection techniI es, +hile more efficient garbage-collection techniI es generally lead to more comple. allocation techniI es( *torage management has infl ences that are far-reaching( In some programs, it may acco nt for a ma5or portion of e.ec tion time( The design of data str ct res, the layo t of bloc0s, and the representation of strings are all infl enced by storage-management considerations( $or e.ample, both a descriptor that points to a bloc0 and the first +ord of the bloc0 contain the same type code( This information is red ndant as far as program e.ec tion is concerned, since bloc0s are accessed only )ia descriptors that point to them( The red ndant type information is moti)ated by storage-management considerations( % ring garbage collection, it is necessary to access bloc0s directly, rather than thro gh pointers from descriptors, and it m st be possible to determine the type of a bloc0 from the bloc0 itself( *imilarly, the si7e of a bloc0 is of no interest in performing lang age operations, b t the si7e is needed d ring garbage collection( /loc0s, therefore, carry some 2o)erhead2 for storage management( This o)erhead consists primarily of e.tra space, reflecting the fact that it ta0es more space to manage storage dynamically than +o ld be needed if space +ere allocated statically( /alancing space o)erhead against efficiency in allocating and collecting ob5ects is a comple. tas0( * ch problems ha)e plag ed and intrig ed implementors since the early days of &I*P( >any +ays ha)e been de)ised to handle dynamic storage management, and some techniI es ha)e been highly refined to meet specific reI irements 3Cohen 1:G14( In the

14&

case of Icon, there is more emphasis on storage management for strings than there is in a lang age, s ch as &I*P, +here lists predominate( IconJs storage-management system reflects pre)io s e.perience +ith storage-management systems sed in DP& 3>c8eeman, =oming, and 9ortman 1:F04, *#6/6&< 3=anson 1:FF4, and the earlier Eatfor implementation of Icon 3=anson 1:G04( The res lt is, of co rse, some+hat idiosyncratic, b t it pro)ides an interesting case st dy of a real storage-management system(

11.1 Memory "ayo#t


% ring the e.ec tion of an Icon program, memory is di)ided into se)eral regions( The si7es and locations of these regions are some+hat dependent on comp ter architect re and the operating system sed, b t typically they ha)e the follo+ing form@
run+time s8stem icode allocated storage free space s8stem stac1

This diagram is not dra+n to scale, some regions are m ch larger than others( The R"n.Time ! stem. The r n-time system contains the e.ec table code for the interpreter, b ilt-in operators and f nctions, s pport ro tines, and so forth( It also contains static storage for some Icon strings and bloc0s that appear in C f nctions( $or e.ample, the bloc0s for 0ey+ord trapped )ariables are statically allocated in the data area of the r ntime system( * ch bloc0s ne)er mo)e, b t their contents may change( The si7e of the r ntime system is some+hat machine-dependent( 1bo t FA,000 bytes 3decimal4 is typical( The Icode Region. 6ne of the first things done by the r n-time system is to read in the icode file for the program that is to be e.ec ted( The data in the icode region, +hich is prod ced by the lin0er, is di)ided into a n mber of sections@
code and -loc1s record information glo-al identifier values glo-al identifier names static identifier values strings

The first section contains )irt al machine code, bloc0s for cset and real literals, and proced re bloc0s, on a per-proced re basis( Th s, the section of the icode region that contains code and bloc0s consists of segments of the follo+ing form for each proced re@
-loc1s for real literals -loc1s for cset literals procedure -loc1s virtual machine instructions

Eecord information for the entire program is in the second section of the icode region( #e.t, there is an array of descriptors for the )al es of the global identifiers in the program, follo+ed by an array that contains I alifiers for the names of the global identifiers( These

141

t+o arrays are parallel( The ith descriptor in the first array contains the )al e of the ith global identifier, and the ith descriptor in the second array contains a I alifier for its name( $ollo+ing the t+o arrays related to global identifiers is an array for the )al es of static identifiers( 1s mentioned in *ec( 2(1(10, static identifiers ha)e global scope +ith respect to proced re in)ocation, b t a static identifier is accessible only to the proced re in +hich it is declared( Unli0e cset and real bloc0s, +hich are compiled on a per-proced re basis, all strings in a program are pooled and are in a single section of the icode region that follo+s the array of static identifiers( 1 literal string that occ rs more than once in a program occ rs only once in the string section of the icode region( %ata in the icode region is ne)er mo)ed, altho gh some components of it may change at r n time( The si7e of the icode region depends primarily on the si7e of the corresponding so rce program( 1s a r le of th mb, an icode region is appro.imately t+ice as large as the corresponding Icon so rce-lang age file( 1n icode file for a short program might be 1,000 bytes, +hile one for a large program 3by Icon standards4 might be 20,000 bytes( +llocated !torage. The space for data ob5ects that are constr cted at r n time is pro)ided in allocated storage regions( This portion of memory is di)ided into three parts@
static region string region -loc1 region

The static region contains co-e.pression bloc0s( The remainder of the allocated storage region is di)ided into strings and bloc0s as sho+n( The string region contains only characters( The bloc0 region, on the other hand, contains pointers( This leads to a n mber of differences in allocation and garbage-collection techniI es in different regions( %ata in the static region is ne)er mo)ed, b t strings and bloc0s may be( /oth the string and bloc0 regions may be mo)ed if it is necessary to increase the si7e of the static region( *imilarly, the bloc0 region may be mo)ed in order to enlarge the string region( The initial si7es of the allocated storage regions )ary considerably from comp ter to comp ter, depending on the si7e of the ser address space( 6n a comp ter +ith a large address space, s ch as the '1D, the initial si7es are static region@ string region@ bloc0 region@ total@ 20,<G0 bytes A1,200 bytes A1,200 bytes l22,GG0 bytes 3A,120 +ords4 312,G00 +ords4 312,G00 +ords4 330,F20 +ords4

6n a comp ter +ith a small address space( s ch as the P%P-ll( the initial si7es are static region@ string region@ bloc0 region@ total@ <,0:B bytes 10,2<0 bytes 10,2<0 bytes 2<,AFB bytes 32,0<G +ords4 3A,120 +ords4 3A,120 +ords4 312,2GG +ords4

The ser may establish different initial si7es prior to program e.ec tion by sing en)ironment )ariables( 1s indicated pre)io sly, the si7es of these regions are increased at r n time if necessary, b t there is no pro)ision for decreasing the si7e of a region once it has been established(

142

#ree !pace and the ! stem !tack( 6n comp ters +ith system stac0s that gro+ do+n+ard, s ch as the '1D, the system stac0 gro+s to+ard the allocated storage region( /et+een the t+o regions is a region of free space into +hich the allocated storage region may gro+ p+ard( C.cessi)e rec rsion in C may ca se collision of the system stac0 and the allocated storage region( This is an nreco)erable condition, and the res lt is termination of program e.ec tion( *imilarly, more space may be needed for allocated storage than is a)ailable( This also res lts in termination of program e.ec tion( In practice, the act al sit ation depends to a large e.tent on the si7e of the ser address space, +hich is the total amo nt of memory that is a)ailable for all the regions sho+n pre)io sly( 6n a comp ter +ith a small ser address space, s ch as the P%P-11, the amo nt of memory a)ailable for allocated storage is a limiting factor for some programs( $ rthermore, collision of the allocated storage region and the system stac0 is a serio s problem( 6n a comp ter that s pports a large )irt al memory, the si7e of the system stac0 is deliberately limited, since the the total amo nt of memory a)ailable is so large that r na+ay rec rsion +o ld cons me enormo s reso rces before a collision occ rred bet+een the system stac0 and the allocated storage region(

11.$ Allocation
*torage allocation in Icon is designed to be fast and simple( "arbage collection is some+hat more complicated as a res lt( Part of the rationale for this approach is that most Icon programs do a considerable amo nt of allocation, b t many programs ne)er do a garbage collection( =ence, programs that do not garbage collect are not penali7ed by a strategy that ma0es garbage collection more efficient at the e.pense of ma0ing allocation less efficient( The other rationale for this approach is that the storage reI irements of Icon do not readily lend themsel)es to more comple. allocation strategies( 11(2(1 The )tatic *egion %ata allocated in the static region is ne)er mo)ed, altho gh it may be freed for re se( Coe.pression bloc0s are allocated in the static region, since their C stac0s contain internal pointers that depend on both the comp ter and the C compiler and hence are diffic lt to relocate to another place in memory( $ rthermore, since co-e.pression bloc0s are all the same si7e, it is economical and simple to free and re se their space( The C library ro tines malloc and free are sed to allocate and free co-e.pression bloc0s in the static region( These ro tines maintain a list of bloc0s of free space( The ro tine malloc finds a bloc0 of the reI ested si7e, di)iding a larger bloc0 if necessary, and re)ises the free list accordingly( The ro tine free ret rns the freed space to the free list, coalescing it +ith ad5acent free bloc0s if possible( *ee 8ernighan and Eitchie 1:FG for a disc ssion of freelist allocation( Icon contains its o+n )ersion of these ro tines to ass re that space is allocated in its o+n static region and to allo+ its o)erall memory region to be e.panded +itho t conflict +ith other sers of malloc( Th s, if a ser e.tension to Icon or the operating system calls malloc, IconJs o+n ro tine handles the reI est( This means that the static region may contain space allocated for data other than coe.pression bloc0s, altho gh this normally is not the case(

143

11(2(2 0loc1s $or other 0inds of bloc0s, Icon ta0es ad)antage of the fact that its o+n data can be relocated if necessary and ses a )ery simple allocation techniI e( The allocated region for bloc0s is di)ided into t+o parts@

9hen there is a reI est for a bloc0 of n bytes, the free pointer, bl0free, is incremented by n and the pre)io s )al e of the free pointer is ret rned as the location of the ne+ly allocated bloc0( This process is fast and free of the comple.ities of the free-list approach( #ote that this techniI e really amo nts to a free list +ith only one bloc0( The problem of reclaiming fragmented space on the free list is e.changed for the process of reclaiming n sed bloc0s and rearranging the bloc0 region so that all the free space is in one contig o s portion of the bloc0 region( This is done d ring garbage collection( 11(2(3 )trings There is e)en less 5 stification for a free-list approach for allocating strings( 1 ne+ly created string may be one character long or it may be tho sands of characters long( $ rthermore, +hile there is space in bloc0s that can be sed to lin0 together free storage, there is no s ch space in strings, and a free list +o ld in)ol)e additional storage( Instead, the string region is allocated in the same +ay that the bloc0 region is allocated@

1s +ith the bloc0 region, a garbage collection is performed if there is not eno gh space in the string region to satisfy an allocation reI est(

11.& 9ar.age Collection


1llocation is simple, b t garbage collection is not( The primary p rpose of garbage collection is to reclaim the space occ pied by 2dead2 ob5ects that are not needed for s bseI ent program e.ec tion, so that this space can be reallocated( This means different things in different regions( In the static region, it means freeing dead co-e.pression bloc0s(

144

In the string and bloc0 regions, it in)ol)es mo)ing the space for dead ob5ects from the allocated portion of the region to the free portion( This is considerably more complicated than adding a pointer to a free list( *ince all free space m st be in a single bloc0 in these regions, 2li)e2 ob5ects m st be mo)ed to fill in the holes left by dead ones( This is done by compacting the allocated portion of these regions, relocating li)e ob5ects to+ard the beginning of these regions and sI ee7ing o t dead ob5ects( In t rn, pointers to li)e ob5ects ha)e to be ad5 sted to correspond to their ne+ locations( There are t+o phases in garbage collection@

&ocation of li)e ob5ects and all the pointers to them( Compaction of li)e ob5ects and ad5 stment of the pointers to them(

2"arbage collection2 is some+hat of a misnomer, since the process is oriented to+ard sa)ing 2non-garbage2 ob5ects, garbage disappears as a byprod ct of this operation( 11(3(1 The 0asis The challenging problem for garbage collection is the location of ob5ects that ha)e to be sa)ed, as +ell as all pointers to them( 1n ob5ect is dead, by definition, if it cannot be accessed by any f t re so rce-lang age comp tation( Con)ersely, by definition, an ob5ect is li)e if it can be accessed( ConseI ently, the important iss e is the possibility of comp tational access( $or e.ample, it is al+ays possible to access the )al e of Ls b5ect, and this )al e m st be preser)ed by garbage collection( 6n the other hand, in
a := [1,2,3] a := list(10)

after the e.ec tion of the second assignment, the first list assigned to a is inaccessible and can be collected( It is essential to sa)e any ob5ect that may be accessed, b t there is no +ay, in general, to 0no+ if a specific ob5ect will be accessed( $or e.ample, a comp tational path may depend on factors that are e.ternal to the program, s ch as the )al e of data that is read from a file( It does comparati)ely little harm to sa)e an ob5ect that might be accessed b t, in fact, ne)er is( *ome storage is +asted, b t it is li0ely to be reclaimed d ring a s bseI ent collection( It is a serio s error, on the other hand, to discard an ob5ect that s bseI ently is accessed( In the first place, the former )al e of s ch an ob5ect s ally is o)er+ritten and hence is 2garbage2 if it is s bseI ently accessed( $ rthermore, accessing s ch an ob5ect may o)er+rite another accessible ob5ect that no+ occ pies the space for the former one( The effects may range from incorrect comp tational res lts to addressing )iolations( The so rces of s ch errors also are hard to locate, since they may not be manifested ntil considerably later d ring e.ec tion and in a conte.t that is nrelated to the real ca se of the problem( ConseI ently, it is important to be conser)ati)e and to err, if at all, on the side of sa)ing ob5ects +hose s bseI ent accessibility is I estionable( #ote that it is not only necessary to locate all accessible ob5ects, b t it is also necessary to locate all pointers to ob5ects that may be relocated( The location phase starts +ith a basis that consists of descriptors that point to ob5ects that may be accessible and from +hich other ob5ects may be accessed( $or e.ample, Ls b5ect is in the basis( The precise content of the basis is partly a conseI ence of properties of the Icon lang age and partly a conseI ence of the +ay the r n-time system is implemented( The basis consists of the follo+ing descriptors@

Lmain 3co-e.pression bloc0 for the initial call of main4

145

c rrent co-e.pression bloc0 )al es of global identifiers )al es of static identifiers Ls b5ect sa)ed )al es of map arg ments tended descriptors

The tended descriptors pro)ide temporary storage for a r n-time s pport ro tine in +hich a garbage collection may occ r( *ee *ec( 12(2(2( #ot all ob5ects that ha)e to be sa)ed are pointed to directly by the basis( The )al e of a local identifier on the interpreter stac0 may point to a list-header bloc0 that in t rn points to a list-element bloc0 that contains elements pointing to strings and other bloc0s( Pointer chains also can be circ lar( 11(3(2 The Location Phase $or historical reasons, the location phase is sometimes called mar"ing. This term refers to the common practice of setting an identifying bit in ob5ects that ha)e been located( #ot all s ch processes act ally change the ob5ects that are located( The +ay that this is done in Icon depends on the region in +hich an ob5ect is located( % ring the location phase, e)ery descriptor in the basis is e.amined( 1 descriptor is of interest only if it is a I alifier or if its )-+ord contains a pointer 3that is, if its d-+ord contains a p flag4( $or a pointer dp to a descriptor, the follo+ing chec0s are performed@
if (Qual(*dp)) postqual(dp); else if (Pointer(*dp)) markblock(dp);

+here the macro Pointer3d4 tests the d-+ord of d for a p flag( !trings. The ro tine postI al first chec0s that the )-+ord of the I alifier points to a string in the allocated string region, since strings in other parts of memory are not of interest d ring garbage collection( If the string is in the allocated string region( a pointer to the I alifier is placed in an array@
postqual(dp) struct descrip *dp; { if (StrLoc(*dp) >= strbase && StrLoc(*dp) < strend) *qualfree++ = dp; }

The array I allist is empty +hen garbage collection begins( Its si7e is chec0ed before a pointer is added to it, and more space is obtained if it is needed altho gh the code for doing that is not sho+n here( *ee *ec( 11(3(B( The pointers that acc m late in I allist d ring the mar0ing phase pro)ide the information necessary to determine the portion of the allocated string region that is in se( In addition, these pointers point to all the I alifiers +hose )-+ord m st be ad5 sted +hen the strings they point to are mo)ed d ring the compaction of string region( *ee *ec( 11(3(3(

14

(locks. The location phase for bloc0s is more complicated than that for strings, since bloc0s can contain descriptors that point to strings as +ell as to other bloc0s( The ob5ects that these descriptors point to m st be processed also( Unli0e strings, in +hich a separate array is sed to 0eep trac0 of I alifier that ha)e been located, no e.tra space is needed to 0eep trac0 of descriptors that point to bloc0s( Instead, descriptors and the titles of the bloc0s they point to are modified temporarily( The title of any bloc0 located in the allocated bloc0 region is changed to point to a bac" chain that contains all the descriptors that point to that bloc0( The descriptors are lin0ed thro gh their )-+ords( The follo+ing e.ample ill strates the process( * ppose there is a record declaration
record term(value, code, count)

and that the follo+ing e.pressions are e)al ated@


x := term("chair", "noun",4) y := x

The )al es of ., y, and the bloc0 they point to are related as follo+s@

* ppose that the descriptor containing the )al e of . is processed d ring the location phase before the descriptor containing the )al e of y( This descriptor is identified as pointing to a bloc0 in the allocated bloc0 region by )irt e of the p flag in its d-+ord and an address range chec0 on the )al e of its )-+ord( The bac0 chain is established by setting the title +ord of the bloc0 to point to the descriptor and setting the )-+ord of the descriptor to hold the pre)io s conter the title +ord( The res lt is

14!

The title +ord of the bloc0 no+ points to the descriptor that pre)io sly pointed to the bloc0( This change is re)ersible, and prior to the completion of the garbage collection process the pre)io s relationship is restored( 1 cr cial b t some+hat s btle aspect of the change is that it is no+ possible to tell that the bloc0 has been mar0ed( The n merical magnit de of the )al e of its title +ord is greater than that of any type code, since all descriptors in the r n-time system are at memory locations +hose addresses are larger than the largest type code( The descriptors in the record bloc0 no+ are processed in the same +ay as descriptors in the basis( In order to do this, it is necessary to 0no+ +here descriptors are located in the bloc0( *ince bloc0s in the allocated bloc0 region are organi7ed so that all descriptors follo+ all non-descriptor data, it is only necessary to 0no+ +here the first descriptor is and ho+ large the bloc0 is( These )al es are determined sing t+o arrays that ha)e entries for each type code( The first array, bsi7es, pro)ides the information that is needed to determine bloc0 si7es( There are three 0inds of entries( 1n entry of -1 indicates a type for +hich there is no bloc0 or for +hich the bloc0s are not in the allocated bloc0 region( C.amples are TV# ll and TVCoe.pr( 1n entry of 0 indicates that the si7e of the bloc0 follo+s the bloc0 title( This is the case for records( 1ny other entry is the act al si7e of the bloc0 in bytes( $or e.ample, the entry in bsi7es for TV&ist is 2< on a 32-bit comp ter( The second array, firstd, is sed to determine the byte offset of the first descriptor in the bloc0( 1s +ith bsi7es, a )al e of -1 indicates a type for +hich there are no associated bloc0s in the allocated bloc0 region( 1 )al e of 0 indicates that there are no descriptors in the bloc0( C.amples are TVCset and TVEeal( $or TVEecord, the entry is G for 32-bit comp ters, indicating that the first descriptor is at an offset of G bytes 32 +ords4 from the beginning of the bloc0( *ee *ec( <(2( $or the pre)io s e.ample, after the descriptors in the record bloc0 are processed, the location phase contin es( 9hen the descriptor that contains the )al e of y is processed, it is added to the bac0 chain by again e.changing the contents of its )-+ord +ith the contents of the title of the bloc0( 1s a res lt, the title of the bloc0 points to the descriptor for the )al e of y and its )-+ord points to the descriptor for the )al e of .@

*ince the title of the bloc0 that y points to is mar0ed, the descriptors in it are not processed( This pre)ents descriptors from being processed t+ice and also pre)ents the mar0ing phase from looping in case there are pointer loops among bloc0s(

14"

If a )ariable descriptor is enco ntered +hen processing descriptors +hose d-+ords contain p flags, the )al e the )ariable points to belongs to one of the follo+ing categories@

trapped-)ariable bloc0 global or static identifier arg ment or local identifier descriptor in a str ct re

1 trapped )ariable, indicated by a t flag in its )-+ord, points to a bloc0 and is processed li0e any other descriptor that points to a bloc0( The )al es of global and static identifiers are in the basis and are processed separately( The )al es of arg ments and local identifiers are on an interpreter stac0 and are processed +hen its co-e.pression bloc0 is processed( 1 )ariable descriptor that points to a descriptor in a str ct re points within a bloc0, not to the title of a bloc0( This is the only case in +hich the offset, +hich is contained in the leastsignificant portion of the d-+ord of a non-trapped-)ariable descriptor, is non7ero( ConseI ently, this offset is sed to disting ish s ch )ariables from those in the second and third categories( Contin ing the pre)io s e.ample, s ppose that a garbage collection is triggered by e)al ation of the e.pression
x.count := read()

1t the beginning of garbage collection, there is a )ariable descriptor for the field reference that points to the record bloc0 in addition to the descriptors for the )al es of . and y( If the )al es of . and y are processed first as described pre)io sly, the sit ation +hen the )ariable descriptor is enco ntered is

#ote that the offset in the d-+ord of the )ariable descriptor is in +ords, not bytes( The offset, con)erted to bytes, is added to the )-+ord in the )ariable descriptor, and this descriptor is lin0ed into the bac0 chain(

14#

9hen the location phase is complete, the title of each bloc0 in the allocated bloc0 region that m st be sa)ed points to a chain of all the descriptors that originally pointed to it( This pro)ides the necessary information to ad5 st the )-+ords of these descriptors to acco nt for the relocation of the bloc0 d ring the compaction phase( *ee *ec( 11(3(3( If a descriptor that points to a co-e.pression bloc0 is enco ntered d ring the location phase, the title of the co-e.pression bloc0 is mar0ed and the descriptors in the coe.pression bloc0 are processed in a fashion similar to that for bloc0s in the allocated bloc0 region( *ince co-e.pression bloc0s are ne)er mo)ed, it is not necessary to 0eep trac0 of descriptors that point to them( To mar0 the title, it is s fficient to change it to a )al e that is larger than any type code( The acti)ator of the co-e.pression 3if any4 is processed li0e any other co-e.pression bloc0( *imilarly, the refresh bloc0 that is pointed to from the co-e.pression bloc0 m st be processed li0e any other bloc0( The rest of the descriptors associated +ith a co-e.pression are in its interpreter stac0( =ere the sit ation is more complicated than it is +ith bloc0s in the allocated bloc0 region, since interpreter stac0s contain frame mar0ers in addition to descriptors( %espite this, all the descriptors, and only the descriptors, on an interpreter stac0 m st be processed( Interpreter stac0s are processed by the ro tine s+eep, +hich starts at sp for the stac0 and +or0s to+ard the stac0 base( %escriptors are processed ntil the ne.t frame mar0er is enco ntered, at +hich point, depending on the type of the frame, the mar0er is s0ipped and ne+ frame pointers are set p from it( The ro tine for mar0ing bloc0s is
markblock(dp) struct descrip *dp; { register struct descrip *dp1; register char *endblock, *block; static word type, fdesc, off; /* * Get the block to which dp points. */ block = (char *)BlkLoc(*dp); if (block >= blkbase && block < blkfree) { /* check range */ if (Var(*dp) && !Tvar(*dp)) {

15&

/* * The descriptor is a variable; point block to the head * of the block containing the descriptor to which dp * points. */ off = Offset(*dp); if (off == 0) return; else block = (char *)word *)block -off); } type = BlkType(block); if ((uword)type <= MaxType) { /* * The type is valid, which indicates that this block * has not been marked. Point endblock to the byte past * the end of the block. */ endblock = block + BlkSize(block); } /* * Add dp to the back chain for the block and point the * block (via the type field) to dp. */ BlkLoc(*dp) = (union block *)type; BlkType(block) = (word)dp; if (((unsigned)type <= MaxType) && ((fdesc = firstd[type]) >0)) /* * The block has not been marked, and it does contain * descriptors. Mark each descriptor. */ for (dp1 = (struct descrip *)(block + fdesc); (char *) dp1 < endblock; dp1++) { if (Qual(*dp1)) postqual(dp1 ); else if (Pointer(*dp1) markblock(dp1 ); } } else if (dp->dword == D_Coexpr && (unsigned)BlkType(block) <= MaxType) { /* * dp points to a co-expression block that has not been * marked. Point the block to dp. Sweep the interpreter * stack in the block. Then mark the block for the * activating co-expression and the refresh block. */ BlkType(block) = (word)dp; sweep((struct b_coexpr *)block); markblock(&((struct b_coexpr *)block)->activator); markblock(&((struct b_coexpr *)block)->freshblk); } }

The macro /l0Type3cp4 prod ces the type code of the bloc0 pointed to by cp( The macro /l0*i7e3cp4 ses the array bsi7es to determine the si7e of the bloc0 pointed to by cp(

151

11(3(3 Pointer 2d9ustment and Compaction !trings. 9hen the location phase is complete, I allist contains a list pointers to all the I alifiers +hose )-+ords point to the allocated string region( $or e.ample, s ppose that the allocated string region at the beginning of a garbage collection is (((#ecessity is the mother of strange bedfello+s((( S S S

strbase N<00 strfree * ppose also that the follo+ing I alifiers reference the allocated string region@

The pointers to the allocated string region are (((#ecessity is the mother of strange bedfello+s ((( S S S S S S

strbase N<00 N<1A N<20

N<30 strfree

#ote that the I alifiers point to o)erlapping strings( 1fter the location phase, I allist might contain the follo+ing pointers@

152

The order of the pointers in I allist depends on the order in +hich the I alifiers are processed@ there is no necessary relationship bet+een the order of the pointers in I allist and the order of the pointers to the allocated string region( 1t the beginning of the pointer-ad5 stment phase of garbage collection, the array I allist is sorted in non-decreasing order by the )-+ords in I alifiers that are pointed to from I allist( This allo+s the pointers to the allocated string region to be processed in nondecreasing order so that the portions of the allocated string region that m st be sa)ed and compacted can be determined( Contin ing the pre)io s e.ample, I allist becomes

The )-+ords of the I alifiers in the order of the pointers in I al!ist no+ are strbaseN<00 strbaseN<00 strbaseN<00 strbaseN<1A strbaseN<20 strbaseN<30 strbaseN<30 *ince I alifiers may reference o)erlapping strings, care m st be ta0en to identify contig o s 2cl mps2 of characters that may be shared by I alifiers( The pointers in I allist are processed in order( Three pointers in the string region are maintained@ dest, the ne.t free destination for a cl mp of characters to be sa)ed, so rce, the start of the ne.t cl mp, and cend, the end character in the c rrent cl mp( 9hen a I alifier that is pointed to from I allist is processed, the first I estion is +hether its )-+ord addresses a character that is beyond the end of the c rrent cl mp 3since )+ords are processed in n merical order, the address is either in the c rrent cl mp or beyond the end of it4( If it is in the c rrent cl mp, cend is pdated, pro)ided the last character of the c rrent I alifier is beyond cend( If it is not in the c rrent cl mp, the cl mp is mo)ed from so rce to dest( In either case, the )-+ord of the c rrent I alifier is ad5 sted 3dest -so rce is added to it4( In the pre)io s e.ample, the allocated string region after collection is U is the m of strange fel SS strbase strfree and the se)en I alifiers that point to it are

153

The ro tine for compacting the allocated string region and ad5 sting pointers to it is
scollect(extra) word extra; { register char *source, *dest; register struct descrip **qptr; char *cend; extern int alcmp(); if (qua/free <= quallist) { /* * There are no accessible strings. Thus, there are * none to collect and the whole string space is free. */ strfree = strbase; return; } /* * Sort the pointers on quallist in ascending order of * string locations. */ qsort(quallist, qualfree-quallist, sizeof(struct descrip *), qlcmp); /* * The string qualifiers are now ordered by starting * location. */ dest = strbase; source = cend = StrLoc(**quallist); /* * Loop through qualifiers for accessible strings. */

154

for (qptr = quallist; qptr < qualfree; qptr++) { if (StrLoc(**qptr) > cend) { /* * qptr points to a qualifier for a string in the * next clump. The last clump is moved. and source * and cend are set for the next clump. */ while (source < cend) *dest++ = *source++; source = cend = Strl oc(**qotr). if (Strloc(**qptr)+Strlen(**qptr) > cend) /* * qptr is a qualifier for a string in this clump; * extend the clump. */ cend = Strloc(**qptr) + Strlen(**qptr); /* * Relocate the string qualifier. */ StrLoc(**qptr) += dest -source + extra; } /* * Move the last clump. */ while (source < cend) *dest++ = *source++; strfree = dest; }

The arg ment e.tra pro)ides an offset in case the string region is mo)ed( *ee *ec( 11(3(A( *orting is done by the C library ro tine Isort, +hose fo rth arg ment is a ro tine that performs the comparison
qlcmp(q1, q2) struct descrip //q1, //q2$ return (int)(@tr4oc(//q1) 7@trloc(//q2))%

(locks( 1fter the location phase, some bloc0s in the allocated bloc0 region are mar0ed and others are not( In the follo+ing typical sit ation, the hori7ontal lines delimit bloc0s, gray areas indicate mar0ed bloc0s, and clear areas indicate nmar0ed bloc0s@

155

In the allocated bloc0 region, pointer ad5 stment and compaction are done in t+o linear passes o)er the region bet+een bl0base and bl0free( In the first pass, t+o pointers are J sed, dest and so rce( dest points to +here the ne.t bloc0 +ill be after bloc0s are mo)ed in the ne.t pass, +hile so rce points to the ne.t bloc0 to be processed( /oth dest and so rce start at bl0base, pointing to the first allocated bloc0( % ring this pass, the title of each bloc0 pointed to by so rce is e.amined( If it is not mar0ed 3that is, if it is not larger than the ma.im m type code4, dest is left nchanged and so rce is incremented by the si7e of the bloc0 to get to the title of the ne.t bloc0( Th s, nmar0ed bloc0s are s0ipped( The array bsi7es is sed, as before, to determine bloc0 si7es( If the title of the bloc0 pointed to by so rce is mar0ed, its bac0 chain of descriptors is processed, changing their )-+ords to point to +here dest points( In the case of a )ariable descriptor that is not a trapped-)ariable descriptor, the offset in its d-+ord is added to its )-+ord, so that it points to the appropriate relati)e position +ith respect to dest( The last descriptor in the bac0 chain is identified by the fact that its )-+ord contains a type code 3a )al e smaller than any possible pointer to the allocated bloc0 region4( This type code is restored to the title of the bloc0 before the )-+ord is changed to point to the destination( 1n m flag is set in the title to disting ish it as a mar0ed bloc0, since the former mar0ing method no longer applies, b t the compaction phase needs to determine +hich bloc0s are to be mo)ed( 1fter the bac0 chain has been processed, all descriptors that point to the bloc0 no+ point to +here the bloc0 will be +hen it is mo)ed d ring the compaction phase( The bloc0 itself is not mo)ed at this time( This is ill strated by the e.ample gi)en pre)io sly, in +hich three descriptors point to a record bloc0( 1fter mar0ing( the sit ation is

15

1fter processing the bac0 chain, the sit ation is

#ote that the )-+ords of the descriptors point to +here the bloc0 will be after it is mo)ed( The ro tine for ad5 sting pointers to the allocated bloc0 region is
adjust(source, dest) char *source, *dest; { register struct descrip *nxtotr, *totr; /* * Loop through to the end of allocated block region, moving * source to each block in turn and using the size of a block * to find the next block. */ while (source < blkfree) { if ((uword)(nxtptr = (struct descrip *)BlkType(source)) > MaxType) { /* * The type field of source is a back pointer. Traverse * the chain of back pointers, changing each block * location from source to dest. */ while ((uword)nxtptr > MaxType) { tptr = nxtptr; nxtptr = (struct descrip *)BlkLoc(*nxtptr); if (Var(*tptr) && !Tvar(*tptr)) BlkLoc(*tptr) = (union block *)((word *)dest + Offset(*tptr)); else

15!

BlkLoc(*tptr) = (union block *)dest; } BlkType(source) = (uword)nxtptr I F _Mark; dest += BlkSize(source); } source += BlkSize(source); } }

9hen the pointer-ad5 stment phase is complete, the bloc0s can be mo)ed( 1t this time, all the bloc0 titles contain type codes, and those that are to be sa)ed are mar0ed by m flags( % ring the compaction phase, these pointers are sed again to reference the destination and so rce of bloc0s to be mo)ed( If an nmar0ed bloc0 is enco ntered, so rce is incremented by the bloc0 s0ipping o)er the bloc0( If a mar0ed bloc0 is enco ntered, the m flag in its is remo)ed and the bloc0 is copied to dest( Then dest and so rce are incremented by the si7e of the bloc0( 9hen bl0free is reached, it is set to dest( 1t this point the allocated bloc0 region has been compacted( 1ll sa)ed bloc0s are before bl0free, and all free space is after it( The pointers that +ere ad5 sted no+ point to their bloc0s, and the relati)e sit ation is the same as it +as before garbage collection( The ro tine for compacting the allocated bloc0 region is
compact(source) char *source; { register char *dest; register word size; /* * Start dest at source. */ dest = source; /* * Loop through to end of allocated block space, moving * source to each block in turn, using the size of a block to * find the block. If a block has been marked, it is copied to * the location pointed to by dest and dest is pointed past * the end of the block, which is the location to place the * next saved block. Marks are removed from the saved blocks. */ while (source < blkfree) ( size = BlkSize(source); if (BlkType(source) & F_Mark) { BlkType(source) &= -F_Mark; if (source != dest) mvc((uword)size, source, dest); dest += size; } source += size; } /* * dest is the location of the next free block. Now that * compaction is complete, point blkfree to that location. */ blkfree = dest;

15"

The ro tine m)c3n, so rce, dest4 mo)es n bytes from so rce to dest( 11(3(4 Collecting Co+4xpression 0loc1s 1fter the location phase of garbage collection is complete, all the li)e co-e.pression bloc0s are mar0ed, b t the dead co-e.pression bloc0s are not( It is a simple matter to process the list of co-e.pression bloc0s, +hich are lin0ed by pointers, calling free to deallocate dead ones and at the same time remo)ing them from the list( $or li)e co-e.pressions, the type code in the title is restored( The ro tine cofree that frees co-e.pression bloc0s is
cofree() { register struct b_coexpr **ep, *xep; extern int mstksize; /* main stack size */ /* * Reset the type for &main. */ BlkLoc(k_main)->coexpr.titie = T_Coexpr; /* * The co-expression blocks are linked together through their * nextstk fields, with stklist pointing to the head of the * list. The list is traversed and each stack that was not * marked is freed. */ ep = &stklist; while (*ep != NULL) { if (BlkType(*ep) == T_Coexpr) { xep = *ep; *ep = (*ep)->nextstk; free((char *)xep); } else { BlkType(*ep) = T_Coexpr; ep = &(*ep)->nextstk; } } }

11(3(5 4xpansion of the 2llocated *egions "arbage collection may not prod ce eno gh free space in a region to satisfy the reI est that ca sed the garbage collection( In this case, the region for +hich the reI est +as made is e.panded( In addition, the allocated string and bloc0 regions are e.panded if the amo nt of free space in them after garbage collection other+ise +o ld be less than a minim m )al e, +hich is called 2breathing room(2 This e.pansion attempts to a)oid 2thrashing2 that might res lt from a garbage collection that lea)es a small amo nt of free space, only to res lt in a s bseI ent garbage collection almost immediately( *ince the allocated bloc0 region is at the end of the memory space for Icon, its e.pansion only in)ol)es calling the C library ro tine, sbr0, +hich e.pands the serJs memory space( If this e.pansion fails, program e.ec tion is terminated +ith an error message( If the allocated string region is e.panded, ho+e)er, the allocated bloc0 region m st be relocated to ma0e room( Eelocating the bloc0 region reI ires relocating all pointers to it( #o e.tra +or0 is needed to do this, ho+e)er( The relocation is accomplished by specifying

15#

the ne+ location of the bloc0 region rather than the c rrent bl0base as the second arg ment to ad5 st( ConseI ently, the ad5 sted pointers point to locations +here bloc0s +ill be +hen they are mo)ed at the end of garbage collection( If the static region is e.panded, both the allocated string region and the allocated bloc0 region m st be relocated( The amo nt of the relocation for the allocated bloc0 region simply affects the second arg ment to ad5 st, as indicated pre)io sly( In the allocated string region, the amo nt is passed as the arg ment to scollect and is added to the )-+ords of the I alifiers pointed to from I allist, as indicated in *ec( 11(3(3( 11(3( )torage *e:uirements during ;ar-age Collection "arbage collection itself ta0es some +or0 space( *pace for pointers to I alifiers is pro)ided in I allist, +hile stac0 space is needed for calls to ro tine that perform the )ario s aspects of garbage collection( The space for I allist is obtained from the free space at the end of the allocated bloc0 region( The amo nt of space needed is proportional to the n mber of I alifiers +hose )+ords point to strings in the allocated string region and s ally is comparati)ely small( *pace for I allist is obtained in small increments This is done in postI al, for +hich the complete ro tine is
postqual( dp) struct descrip /dp ; $ extern c*ar /brk()- extern c*ar /sbrk()if (StrLoc(*dp) >= strbase && StrLoc(*dp) < strend) { /* * The string is in the string space. Add it to the string * qualifier list, but before adding it, expand the string * qualifier list if necessary. */ if (qualfree >= equallist) { equallist += Sqlinc; if ((int) brk( equallist) == -1) runerr(303, NULL); /* terminate if expansion fails */ currend = sbrk(0); } *qualfree++ = dp; } }

The amo nt of stac0 space reI ired d ring garbage collection depends primarily on the depth of rec rsion in calls to mar0bloc0, this is the only place in the garbage collection +here rec rsion occ rs( Eec rsion in mar0bloc0 corresponds to lin0ed lists of pointers in allocated storage( It occ rs +here a descriptor in the static region or the allocated bloc0 region points to an as-yet nmar0ed bloc0( C stac0 o)erflo+ may occ r d ring garbage collection( This problem is partic larly serio s on comp ters +ith small address spaces for programs that se a large amo nt of allocated data( The se of stac0 space d ring mar0ing is minimi7ed by testing descriptor )-+ords before calling mar0bloc0, by sing static storage for )ariables in mar0bloc0 that are not needed in rec rsi)e calls, and by incorporating the code for processing co-e.pression bloc0s in mar0bloc0, rather than calling a separate ro tine(

1 &

11.( !redicti6e =eed


In most systems that manage allocated storage dynamically, garbage collections are triggered by allocation reI ests that cannot be satisfied by the amo nt of free storage that remains( In these systems, garbage collections occ r d ring calls to allocation ro tines( 9hene)er a garbage collection occ rs, all potentially accessible data m st be reachable from the basis, and any descriptors that are reachable from the basis m st contain )alid data( These reI irements pose serio s diffic lties, since, in the normal co rse of comp tation, pointers to accessible ob5ects may only e.ist in registers or on the C stac0 as C local )ariabIes that the garbage collector has no +ay of locating( $ rthermore, descriptors that are being constr cted may temporarily hold in)alid data( 9hile it is helpf l to 0no+ that garbage collection can occ r only d ring calls to allocation ro tines, allocation often is done in the midst of other comp tations( 1ss ring that all accessible data is reachable and that all reachable data is )alid can be diffic lt and prone to error( $or these reasons, Icon ses a slightly different strategy, called 2predicti)e need,2 for triggering garbage collections( Instead of garbage collection occ ring as a byprod ct of an allocation reI est, the amo nt of space needed is reI ested in ad)ance( There are t+o ro tines, bl0reI and strreI, for reser)ing space in ad)ance( These ro tines chec0 the bloc0 and string regions, respecti)ely, to ass re the amo nt of free space needed is act ally a)ailable( If it is not, they call the garbage collector( $or e.ample, strreI is
strreq(uword n) { strneed = n; if (n > strend - strfree) collect(); }

The amo nt of space reI ested is sa)ed in the global )ariable strneed( *ince the space reI ested may be allocated in pieces, this global )ariable is decremented +hen space is allocated@
char *alcstr(char *s, word slen) { register char *d; char *ofree; /* * See if there is enough room in the string region. */ if (strfree + slen > strend) syserr("string allocation botch"); strneed -= slen; /* * Copy the string into the string space, saving a pointer to * its beginning. Note that s may be null, in which case the * space is still to be allocated but nothing is to be copied * into it. */ ofree = d = strfree; if (s) { while (slen-- > 0) *d++ = *s++; } else d += slen;

1 1

strfree = d; return ofree; }

If a garbage collection occ rs, the )al es of strneed and a similar )ariable for the allocated bloc0 region are chec0ed to be s re that eno gh space is collected to satisfy any remaining allocation reI ests( *ince a predicti)e need reI est ass res an adeI ate amo nt of space, no garbage collection can occ r d ring the s bseI ent allocation reI est( The ad)antage of ha)ing a garbage collection occ r d ring a predicti)e need reI est rather d ring an allocation reI est is that a safe time can be chosen for a possible garbage collection( The amo nt of space needed 3or at least an pper bo nd on it4 s ally is 0no+n before the storage is act ally needed( and +hen all )alid data can be located from the basis( The f nction repl pro)ides an e.ample@
FncDcl(repl,2) { register char *sloc; register int cnt; long len; char sbuf[MaxCvtLen]; extern char *alcstr(); /* / Eake sure t*at 5r61 is a strin6' /2 if (cvstr(&Arg1, sbuf) == CvtFail) runerr(103, &Arg1); /* * Make sure that Arg2 is an integer. */ switch (cvint(&Arg2, &Ien)) { /* * Make sure count is not negative. */ case T_Integer: if ((cnt = (int)len) >= 0) break; runerr(205, &Arg2); case T_Long: runerr(205, &Arg2); default: runerr(1 01, &Arg2); } /* * Make sure the resulting string will not be */ if (len * StrLen(Arg1)) > MaxStrLen) runerr(205, NULL); /* * Return an empty string if Arg2 is 0. */ if (cnt == 0) Arg0 = emptystr; else { /*

too Ion

1 2

* Ensure enough space for the replicated string a copy * of s. Then allocate and copy s n times. */ strreq(cnt * StrLen(Arg1)); sloc = alcstr(StrLoc(Arg1), StrLen(Arg1)); cnt--; while (cnt--) alcstr(StrLoc(Arg1), StrLen(Arg1 }); /* * Make Arg0 a descriptor for the replicated string. */ StrLen(Arg0) = (int)len * StrLen(Arg1); StrLoc(Arg0) = sloc; } Return; }

1 disad)antage of predicti)e need is that the ma.im m amo nt of storage needed m st be determined and care m st be ta0en to ma0e predicti)e need reI ests prior to allocation( These problems do not occ r in storage-management systems +here garbage collection is implicit in allocation( ECTE6*PCCTI'C@ *torage management is one of the ma5or concerns in the implementation of a r n-time system in +hich space is allocated dynamically and a tomatically( 1ltho gh many programs ne)er garbage collect at all, for those that do, the cost of garbage collection may be significant( The reI irements of storage management ha)e a significant infl ence on the +ay that data is represented in Icon, partic larly in bloc0s( 1spects of data representation that may appear arbitrary in the absence of considerations related to storage management ha)e definite ses d ring garbage collection( The garbage collector can only identify a pointer by )irt e of the fact that it is contained in the )-+ord of a descriptor( ConseI ently, t+o +ords are reI ired for all sit ations in +hich there may be a pointer to a li)e ob5ect, e)en if this pointer has no representation as a so rce-lang age data ob5ect( $or e.ample, pointers to list-element bloc0s are t+ice as large as they +o ld need to be 5 st to reference list-element bloc0s( 9hile it is possible to de)ise more economical methods of representing s ch data at the e.pense of comple.ity and loss of generality, any method of representing data for +hich space is allocated a tomatically has some o)erhead( "arbage collection is most e.pensi)e +hen there are many li)e ob5ects that m st be sa)ed( $or programs in +hich allocated storage is sed transiently and in +hich there are fe+ li)e ob5ects, garbage collection is fast( 454*CI)4) 11(1 *ince the first +ord of a bloc0 contains its type code, +hy is there also a type code in a descriptor that points to it? 11(2 "i)e an e.ample of an Icon e.pression that changes the contents of a bloc0 that is allocated statically in the r n-time system( 11(3 "i)e an e.ample of an Icon e.pression that changes data in the icode region(

1 3

11(< 9hy not combine global and static identifiers in a single array of descriptors in the icode region? 11(A 9hy are the names of global identifiers needed? 11(B 9hy is there no array for the names of static identifiers? 11(F =o+ long can a string be? 11(G =o+ many elements can a single list-element bloc0 hold? 11(: &ist all the regions of memory in +hich the follo+ing Icon data ob5ects can occ r@ strings descriptors co-e.pression bloc0s other bloc0s 11(10 &ist all the so rce-lang age operations in Icon that may ca se the allocation of storage( 11(11 "i)e an e.ample of an e.pression for +hich it cannot be determined from the e.pression itself +hether or not it allocates storage( 11(12 &ist the bloc0 types for +hich bloc0 si7e may )ary from one bloc0 to another( 11(13 &ist all the types of bloc0s that may occ r in the allocated bloc0 region( 11(1< &ist all the types of bloc0s that may occ r o tside of the allocated bloc0 region( 11(1A "i)e an e.ample of an Icon program in +hich the only access path to a li)e ob5ect d ring garbage collection is a )ariable that points to an element in a str ct re( 11(1B "i)e an e.ample of an Icon program that constr cts a circ lar pointer chain( 11(1F C.plain ho+ it can be ass red that all bloc0s in the allocated bloc0 region are at addresses that are larger than the ma.im m type code( 11(1G 1side from the possibility of looping in the location phase of garbage collection, +hat are the possible conseI ences of processing the descriptors in a bloc0 more than once? 11(1: 9hat +o ld happen if there +ere more than one pointer on I allist to the same I alifier? 11(20 /eca se of the +ay that the Icon r n-time system is +ritten, bloc0s that are not in the allocated bloc0 region do not contain pointers to allocated ob5ects( ConseI ently, the descriptors in s ch bloc0s do not ha)e to be processed d ring garbage collection( K 9hat does this imply abo t access to s ch bloc0s? K 9hat changes +o ld ha)e to be made to the garbage collector if s ch bloc0s co ld contain pointers to allocated ob5ects? 11(21 There is one e.ception to the statement in the preceding e.ercise that bloc0s that are not in the allocated data region do not contain pointers to allocated ob5ects( Identify this e.ception and e.plain ho+ it is handled d ring garbage collection( 11(22 In the allocated string region, pointer ad5 stment and compaction are done in one pass, +hile t+o passes are sed in the allocated bloc0 region( 9hy are pointer ad5 stment and compaction not done in a single pass o)er the allocated bloc0 region?

1 4

11(23 9hat +o ld be the effect of failing to remo)e the m flag from a bloc0 title d ring the compaction of the allocated bloc0 region? 11(2< If garbage collection cannot prod ce eno gh free space in the region for +hich the collection +as triggered, program e.ec tion is terminated e)en if there is e.tra space in another region( %escribe ho+ to modify the garbage collector to a)oid this problem( 11(2A 9rite a program that reI ires an arbitrarily large amo nt of space for I allist( 11(2B 9rite a program that ca ses an arbitrary amo nt of rec rsion in mar0bloc0 d ring garbage collection( 11(2F 9rite a program that prod ces an arbitrarily large amo nt of data that m st be sa)ed by garbage collection, and obser)e the res lts( 11(2G %e)ise a more sophisticated method of pre)enting thrashing in allocation and garbage collection than the fi.ed breathing-room method( 11(2: There is no mechanism to red ce the si7e of an allocated region that may be e.panded d ring one garbage collection, b t +hich has an e.cessi)e amo nt of free space after another garbage collection( %escribe ho+ to implement s ch a mechanism( 11(30 * ppose that a garbage collection co ld occ r d ring a call of any C ro tine from any other C ro tine( =o+ +o ld this complicate the +ay C ro tines are +ritten? 11(31 9hat might happen if K The amo nt of storage specified in a predicti)e need reI est +ere larger than the amo nt s bseI ently allocated? K The amo nt of storage specified in a predicti)e need reI est +ere smaller than the amo nt s bseI ently allocated? 11(32 9hen a list-element bloc0 is nlin0ed as the res lt of a pop, get, or p ll, can the space it occ pies al+ays be reclaimed by a garbage collection? 9hat are the general considerations in ans+ering I estions s ch as these? 11(33 1 )ariable that refers to a descriptor in a bloc0 points directly to the descriptor, +ith an offset in its d-+ord to the head of the bloc0 in +hich the descriptor resides( Co ld it be the other +ay aro nd, +ith a )ariable pointing to the head of the bloc0 and an offset to the descriptor? If so, +hat are the ad)antages and disad)antages of the t+o methods? 11(3< 9hy does s+eep process an interpreter stac0 from its sp to its base, rather than the other +ay aro nd? 11(3A 1s mentioned in *ec( 11(3, all regions are collected, regardless of the region in +hich space is needed( %isc ss the pros and cons of this approach( 11(3B C)al ate the cost of sing t+o-+ord descriptors for all pointers to bloc0s, e)en +hen these pointers do not correspond to so rce-lang age )al es 3as, for e.ample, in the lin0s among list-element bloc0s4( 11(3F The need to garbage-collect bloc0s that are allocated d ring program e.ec tion significantly affects the str ct re and organi7ation of s ch bloc0s( * ppose that garbage collection +ere ne)er needed( =o+ co ld the str ct re and organi7ations of bloc0s be re)ised to sa)e space?

1 5

11(3G %isc ss the pros and cons of ha)ing different regions for allocating bloc0s of different types( 11(3: *ome e.pressions, s ch as +hile +rite3read344 res lt in a s bstantial amo nt of 2storage thro ghp t,2 e)en tho gh no space really needs to be allocated( C.plain +hy this effect cannot be a)oided in general and disc ss its impact on program performance( 11(<0 Physical memory is becoming less and less e.pensi)e, and more comp ter architect res and operating systems are pro)iding larger ser address spaces( %isc ss ho+ )ery large ser address spaces might affect allocation and garbage-collection strategies(

Chapter 12: *un+Time )upport $perations


PCE*PCCTI'C@ *e)eral feat res of IconJs r n-time system cannot be compartmentali7ed as neatly as
storage management b t present significant implementatio problems nonetheless( These feat res incl de type chec0ing and con)ersion dereferencing and assignment, inp t and o tp t, and diagnostic facilities(

1$.1 Type Checking and Con6ersion


Type chec0ing is relati)ely straightfor+ard in Icon( If only one type is of interest a test of the d-+ord is s fficient, as in
if (Type(Arg1) != T_List) runerr(108, &Arg1);

It is necessary to test the entire d-+ord, since a I alifier may ha)e a length that is the same as a type code( The d-+ord test ta0es care of this, beca se all descriptors that are not I alifiers ha)e n flags( If different actions are needed for different types, a separate test is reI ired for I alifiers, since there is no type code for strings( *election according to type generally has the form@
if (Qual(Arg1)) /* string */ else switch (Type(Arg1) { case T_List: /* list */

The real problems lie in type con)ersion, not type chec0ing( 1t the so rce-lang age le)el, type con)ersion can occ r e.plicitly, as a res lt of type-con)ersion f nctions, s ch as string3.4, or it may be implicit( Implicit type con)ersion occ rs freI ently in many 0inds of comp tations( $or e.ample, n meric data may be read from files in the form of strings, con)erted to n mbers in arithmetic comp tations, and then con)erted to strings that are +ritten o t( >any operations s pport this implicit type con)ersion, and they rely on typecon)ersion ro tines( There are fo r types among +hich m t al con)ersion is s pported@ strings, csets, integers, and real n mbers( The details of type con)ersion are part of the Icon lang age definition 3"ris+old and "ris+old 1:G34( $or e.ample, +hen a cset is con)erted to a string, the characters of the res lting string are in le.ical order( *ome con)ersions are conditional and may s cceed or fail, depending on the )al e being con)erted( $or e.ample, a real n mber can be con)erted to an integer only if its )al e is in the range of a C long( The con)ersions are ill strated in the follo+ing diagram, +here dashed lines indicate con)ersions that are conditional@

1 !

Th s, of the t+el)e con)ersions, fi)e are conditional( *ome 0inds of con)ersions are 2nat ral2 and occ r freI ently in typical programs( C.amples are string-to-integer con)ersion and integer-to-string con)ersion( 6ther con)ersions, s ch as cset-to-integer, are nli0ely to occ r in the normal co rse of comp tation( To red ce the n mber of con)ersion ro tines reI ired, these nli0ely con)ersions are done in t+o steps( $or e.ample, the con)ersion of a cset to an integer is done by first con)erting the cset to a string and then con)erting the string to an integer( The direct con)ersions are

Con)ersions are done by calling ro tines that con)ert )al es to e.pected types( These ro tines are cvcset cvint cvreal cvstr con)ert to cset con)ert to integer con)ert to real con)ert to string

*ince these ro tines may be called +ith any type of )al e, all of them are conditional( $or e.ample, it is not possible to con)ert a list to a string( These ro tines ret rn the )al e C)t$ail to indicate the fail re of a con)ersion( If con)ersion is s ccessf l, they ret rn a )al e indicating the type of con)ersion(

1 "

# merical comp tation introd ces complications in addition to the types integer and real, since there is the concept of a n meric 2type2 that incl des both integers and real n mbers( This is represented e.plicitly by the Icon type-con)ersion f nction n meric3.4, +hich con)erts . to either integer or real, depending on the )al e of .( # meric con)ersion occ rs implicitly in polymorphic operations s ch as
n+m

+hich performs integer or real arithmetic, depending on the types of n and m( The ro tine for con)ersion of a )al e to n meric is
cvnum(dp, result) register struct descrip *dp; union numeric *result; { static char sbuf(MaxCvtLen]; if (Fual(/dp)) $ qtos(dp, sbuf)return ston(sbuf, result)% s)itc* (<+pe(/dp)) $ case <,Gnte6er: result78inte6er ! (lon6)GntHal(/dp)return <,Gnte6ercase <,4on6: result78inte6er ! 3lk4oc(/dp)78lon6int'intvalreturn <,4on6case <,.eal: =et.eal(dp, result78real)return <,.ealdefault: 2/ / <r+ to convert t*e value to a strin6 and / t*en tr+ to convert t*e strin6 to an inte6er' /2 if (cvstr(dp, sbuf) !! >vtBail) return >vtBailreturn ston(@tr4oc(/dp), result)% %

The macro "etEeal is sed to handle the real type, since some comp ters ha)e restrictions on the alignment of do bles( Th s, "etEeal has different definitions depending on the target comp ter( The act al con)ersion of a string to a n meric )al e is done by ston( #ote that the con)ersion of a cset to a n meric )al e occ rs in the defa lt cla se by +ay of con)ersion to a string( The con)ersion ro tine c)str reI ires a b ffer to constr ct the string( This b ffer is pro)ided by the ro tine that calls c)str, as ill strated by the pre)io s e.ample( *ee *ec( <(<(<( The code for c)str is
cvstr(dp, sbuf) register struct descrip *dp; char *sbuf; { double rres; if (Qual(*dp)) { return NoCvt; /* It is already a string */ }

1 #

switch (Type(*dp)) { /* * For types that can be converted into strings, call the * appropriate conversion routine and return its result. * Note that the conversion routines change the descriptor * pointed to by dp. */ case T_lnteger: return itos((long)IntVal(*dp), dp, sbuf); case T_Long: return itos(BlkLoc(*dp)->longint.intval, dp, sbuf); case T_Real: GetReal(dp, rres); return rtos(rres, dp, sbuf); case T_Cset: return cstos(BlkLoc(*dp)->cset.bits, dp, sbuf); default: /* * The value cannot be converted to a string. */ return CvtFail; } }

If dp is a I alifier, c)str ret rns #oC)t, a code that indicates no con)ersion +as performed( If a con)ersion is reI ired, itos, rtos, or cstos does the act al +or0, placing its res lt in sb f and changing the descriptor pointed to by dp accordingly( These ro tines ret rn the code C)t, +hich is, in t rn, ret rned by c)str( The )al e ret rned by c)str therefore signals +hether no con)ersion +as needed 3#oC)t4, or a con)ersion +as performed and the string is in sb f 3C)t4, or the con)ersion failed 3C)t$ail4( The reason that the ret rn codes are needed is that if a con)erted string is in a b ffer that is local to the calling ro tine, it m st be copied into allocated storage( 6ther+ise it +o ld be destroyed +hen that ro tine ret rns( 1 simple e.ample of this sit ation occ rs in the ro tine that implements the b ilt-in f nction string3.4@
FncDcl(string, 1) { char sbuf[MaxCvtLen]; extern char *alcstr(); Arg0 = Arg1; switch (cvstr(&Arg0, sbuf)) { /* * If Arg1 is not a string, allocate it and return it; * if it is a string, just return it; fail otherwise. */ case Cvt: strreq(StrLen(Arg0)); /* allocate converted string */ StrLoc(Arg0) = alcstr(StrLoc(Arg0), StrLen(Arg0)); case NoCvt: Return; default: Fail; } }

1!&

1$.$ -ereferencing and Assignment


If there +ere no trapped )ariables, dereferencing and assignment +o ld be tri)ial( $or e.ample, the descriptor d is dereferenced by
d = *VarLoc(d)

+here 'ar&oc references the )-+ord of d@


#define VarLoc(d) ((d).vword.dptr)

The dereferencing or assignment to a trapped )ariable, on the other hand, may in)ol)e a complicated comp tation( This comp tation reflects the meaning associated +ith the operation on the so rce-lang age e.pression that is represented in the implementation as a trapped )ariable( $or e.ample, as disc ssed pre)io sly, in
x[y] := z

the )al e of . may be a list, a string, a table, or a record( 1 s bscripted list or record does not prod ce a trapped )ariable, b t the other t+o cases do( $or a string, the )ariable on the left side of the assignment is a s bstring trapped )ariable( $or a table, the )ariable is a table-element trapped )ariable( In the first case, the assignment in)ol)es the concatenation of three strings and the assignment of the res lt to .( In the second case, it in)ol)es loo0ing for y in the table( If there is a table element for y, its assigned )al e is changed to the )al e of 7( 6ther+ise, the table-element trapped-)ariable bloc0 is con)erted to a tableelement bloc0 +ith the assigned )al e, and the bloc0 is inserted in the appropriate chain( 12(2(1 /ereferencing %ereferencing of other trapped )ariables in)ol)es comp tations of comparable comple.ity( %ereferencing is done in the interpreter loop for arg ments of operators for +hich )ariables are not needed( $or e.ample, in
n+m

the identifiers n and m are dereferenced before the f nction for addition is called 3*ee *ec( G(3(14( 6n the other hand, in
s[i]

the identifier i is dereferenced, b t s is not, since the s bscripting ro tine needs the )ariable as +ell as its )al e( The f nction in)ocation ro tine also dereferences )ariables before a f nction is called( #ote that there is no f nction that reI ires an arg ment that is a )ariable( * spension and ret rn from proced res also dereference local identifiers and arg ments( %ereferencing occ rs in a n mber of other places( $or e.ample, the f nction that handles s bscripting m st dereference the s bscripted )ariable to determine +hat 0ind of res lt to prod ce( The dereferencing ro tine begins as follo+s@
deref(dp) struct descrip *dp; { register word i, j; register union block *bp; struct descrip v, tbl, tref; char sbuf[MaxCvtLen]; extern char *alcstr(); if (!Qual(*dp) && Var(*dp)) { /* * dp points to a variable and must be dereferenced.

1!1

*/

If dp does not point to a )ariable descriptor, the remaining code is s0ipped and deref simply ret rns( If dp points to a )ariable that is not a trapped )ariable, dereferencing is simple@
if (!Tvar(*dp)) /* * An ordinary variable is being dereferenced; just * replace *dp with the descriptor *dp is pointing to. */ *dp = *VarLoc(*dp);

There are three types of trapped )ariables +ith a s+itch on the type@
else switch (Type(*dp)) { case T_Tvsubs: /* * A substring trapped variable is being dereferenced. * Point bp to the trapped variable block and v to * the string. */ bp = TvarLoc(*dp); v = bp->tvsubs.ssvar; DeRef(v); if (IQual(v)) runerr(103, &v); if (bp->tvsubs.sspos + bp->tvsubs.sslen -1 > StrLen(v)) runerr(205, NULL); /* * Make a descriptor for the substring by getting the * length and pointing into the string. */ StrLen(*dp) = bp->tvsubs.sslen; StrLoc(*dp) = StrLoc(v) + bp->tvsubs.sspos -1; break;

The macro %eEef calls deref, a macro is sed so that its definition can be changed to gather statistics on the se of dereferencing( 1 table-element trapped )ariable may point to a table-element trapped-)ariable bloc0 or to a table-element bloc0( The second sit ation occ rs if t+o table-element trapped )ariables point to the same table-element trapped-)ariable bloc0 and assignment to one of the )ariables con)erts the table-element trapped-)ariable bloc0 to a table-element bloc0 before the second )ariable is processed( *ee *ec( F(2( In this case, the )al e of the trapped )ariable is in the table-element bloc0( 6n the other hand, if the trapped )ariable points to a table-element trapped-)ariable bloc0, it is necessary to loo0 p the s bscripting )al e in the table, since an assignment for it may ha)e been made bet+een the time the trapped )ariable +as created and the time it +as dereferenced( If it is in the table, the corresponding assigned )al e is ret rned( If it is not in the table, the defa lt assigned )al e is ret rned( The code is
case T_Tvtbl: if (BlkLoc(*dp)->tvtbl.title == T_Telem) { /* * The tvtbl has been converted to a telem and is * in the table. Replace the descriptor pointed to * by dp with the value of the element.

1!2

*/ *dp = BlkLoc(*dp)->telem.tval; break; } /* * Point tbl to the table header block, tref to the * subscripting value, and bp to the appropriate * chain. Point dp to a descriptor for the default * value in case the value referenced by the subscript * is not in the table. */ tbl = BlkLoc(*dp)->tvtbl.clink; tref = BlkLoc(*dp)->tvtbl.tref; i = BlkLoc(*dp)->tvtbl.hashnum; *dp = BlkLoc(tbl)->table.defvalue; bp=BlkLoc(BlkLoc(tbl)->table.buckets[SlotNum(i,TSlots)]; /* * Traverse the element chain looking for the subscript * value. If found, replace the descriptor pointed to * by dp with the value of the element. */ while (bp != NULL && bp->telem.hashnum <= i) ( if ((bp->telem.hashnum == i) && (equiv(&bp->telem.tref, &tref))) { *dp = bp->telem.tval; break; } bp = BlkLoc(bp->telem.clink); } break;

The macro *lot# m3i,54 prod ces the slot n mber from the hash n mber, gi)en 5 slots( The last case, 0ey+ord trapped )ariables, is simpler, since the )al e is contained in the bloc0 pointed to by the trapped )ariable@
case <,<vk+)d: bp ! <var4oc(/dp)- /dp ! bp78tvk+)d'k+val- break-

12(2(2 2ssignment The )al es of global identifiers are established initially as a byprod ct of reading the icode file into the icode region( 9hen proced res are called, the )al es of arg ments and local identifiers are on the interpreter stac0( These operations associate )al es +ith )ariables, b t assignment, nli0e dereferencing, is e.plicit in the so rce program( The ro tine doasgn is sed to perform all s ch operations( $or e.ample, the f nction for
x := y

is
OpDcl(asgn, 2, ":=") { /* * Make sure that Arg1 is a variable. */ if (Qual(Arg1) || !Var(Arg1)) runerr(111, &Arg1); /* * The returned result is the variable to which assignment is

1!3

/ bein6 made' */ Arg0 = Arg1; /* * All the work is done by doasgn. Note that Arg1 is known * to be a variable. */ if (!doasgn(&Arg1, &Arg2)) Fail; Return; }

#ote that assignment may fail( This can occ r as the res lt of an o t-of-range assignment to Lpos and is indicated by a ret rned )al e of 0 from doasgn( &i0e dereferencing, assignment is tri)ial for )ariables that are not trapped( The ro tine doasgn begins as follo+s@
doasgn(dp1, dp2) struct descrip *dp1, *dp2; { register word i1, i2; register union block *bp; register struct b_table *tp; int (*putf)(); union block *hook; char sbuf1 [MaxCvtLen], sbuf2[MaxCvtLen]; extern struct descrip tended[]; extern struct b_lelem *alclstb(); extern char *alcstr(); tended[1] = *dp1; tended[2] = *dp2; ntended = 2; assign: if (!Tvar(tended[1])) *VarLoc(tended [1]) = tended [2]; else switch (Type(tended[1])) {

1n array of descriptors in the basis is sed, since garbage collection may occ r at )ario s places in doasgn, and it is important to ass re that the descriptors pointed to by dp1 and dp2 are processed properly by the garbage collector( The )al e of the global )ariable ntended specifies ho+ many tended descriptors are in se at any time( 1s for dereferencing, there are three types of trapped )ariables to be considered( 1ssignment to a s bstring trapped )ariable is rather complicated@
case T_Tvsubs: /* * An assignment is being made to a * variable. The tended descriptors * follows: * * tended[1] -the substring trapped * tended[2] -the value to assign * tended[3] -the string containing * tended[4] -the substring * tended[5] -the result string */

substring trapped are used as

variable the substring

1!4

/* * Be sure that the value to be assigned is a string. */ ntended = 5; DeRef(tended [2]); if (cvstr(&tended[2], sbuf1) == CvtFail) runerr(103, &tended[2]); /* * Be sure that the variable in the trapped variable * points to a string. */ tended[3] = BlkLoc(tended [1 ])->tvsubs.ssvar; DeRef(tended [3]); if (!Qual(tended[3])) runerr(103, &tended[3]); strreq(StrLen(tended[3]) + StrLen(tended[2])); /* * Get a pointer to the substring trapped-variable block * and make i1 a C-style index to the character that * begins the substring. */ bp = BlkLoc(tended[1]); i1 = bp->tvsubs.sspos -1; /* * Make tended[4] a descriptor for the substring. */ @tr4en(tended[;]) ! bp78tvsubs'sslen ; @tr4oc(tended[;]) ! @tr4oc(tended[ ]) 0 i1/* * Make i2 a C-style index to the character after the * substring. If i2 is greater than the length of the * substring, it is an error because the string being * assigned will not fit. */ i2 = i1 + StrLen(tended[4]); if (i2 > StrLen(tended[3])) runerr(205, NULL); /* * Form the result string. First, copy the portion of * the substring string to the left of the substring * into the string space. */ StrLoc(tended[5]) = alcstr(StrLoc(tended[3]), i1); /* * Copy the string to be assigned into the string space * effectively concatenating it. */ alcstr(StrLoc(tended [2]), StrLen(tended [2]); /* * Copy the portion of the substring to the right of * the substring into the string space, completing the * result. */ alcstr(StrLoc(tended[3]) + i2, StrLen(tended[3]) -i2); /* * Calculate the length of the new string.

1!5

*/ StrLen(tended[5]) = StrLen(tended[3]) StrLen(tended[4]) + StrLen(tended [2]); bp->tvsubs.sslen = StrLen(tended [2]); tended[1] = bp->tvsubs.ssvar; tended[2] = tended[5]; /* * Everything is set up for the actual assignment. Go * back to the beginning of the routine to do it. */ goto assign;

1t the end of this case, no assignment has been made yet( Instead, tendedQ1R and tendedQ2R contain, respecti)ely, the )ariable to +hich assignment is to be made and the )al e to be assigned( This is the same sit ation that e.ists at the beginning of the ro tine( The act al assignment is made by transferring bac0 to the beginning( #ote that, at this point, tendedQ1R may contain a trapped )ariable( Table-element trapped )ariables ha)e the same possibilities for assignment as for dereferencing( The processing is more complicated, since it may be necessary to con)ert a table-element trapped-)ariable bloc0 into a table-element bloc0 and lin0 it into a chain@
case T_Tvtbl: /* * * The tended descriptors are used as follows: * * tended[1] -the table element trapped variable * tended[2] -the value to be assigned * tended[3] -subscripting value */ /* * Point bp to the trapped-variable block, point * tended[3] to the subscripting value, and point tp to * the table-header block. */ ntended = 3; bp = BlkLoc(tended[1]); if (bp->tvtbl.title == T_Telem) { /* * The trapped-variable block already has been * converted to a table-element block. Just assign * to it and return. */ bp->telem.tval = tended[2]; ntended = 0; return 1; } tended[3] = bp->tvtbl.tref; tp = (struct b_table *)BlkLoc(bp->tvtbl.clink); /* * Get the hash number for the subscripting value and * locate the chain that contains the element to which * assignment is to be made. */ i1 = bp->tvtbl.hashnum; i2 = SlotNum(i1, TSlots);

1!

bp = BlkLoc(tp->buckets[i2]); /* * Traverse the chain to see if the value is already in * the table. If it is there, assign to it and return. */ hook = bp; while (bp != NULL && bp->telem.hashnum <= i1) { if (bp->telem.hashnum == i1 && equiv(&bp->telem.tref, &tended[3])) { bp->telem.tval = tended[2]; ntended = 0; return 1; } hook = bp; bp = BlkLoc(bp->telem.clink); } /* * The value being assigned is new. Increment the table * size, convert the table-element trapped-variable * block to a table-element block, and link it into the * chain. */ G 78si9e00if (hook == bp) { /* it goes at front of chain */ .= BlkLoc(tended[1]); bp->telem.clink = tp->buckets[i2]; BlkLoc(tp->buckets[i2]) = bp; tp->buckets[i2].dword = D_Telem; } else { /* it follows hook */ bp = BlkLoc(tended[1]); bp->telem.clink = hook->telem.clink; BlkLoc(hook->telem.clink) = bp; hook->telem.clink.dword = D_Telem; } bp->Mbl.title = T_Telem; bp->telem.tval = tended [2]; ntended = 0; return 1;

In the case of a 0ey+ord trapped )ariable, the assignment ro tine that is pointed to from the 0ey+ord trapped-)ariable bloc0 is called to perform the assignment
case T_Tvkywd: ntended = 2; putf = BlkLoc(tended[1])->tvkywd.putval; if ((*putf)(&tended[2]) == NULL) { ntended = 0; return 0; /* assignment fails */ } ntended = 0; return 1;

The assignment ro tine for Ls b5ect is typical@


putsub(dp) struct descrip *dp; { char sbuf[MaxCvtLen]; extern char *alcstr();

1!!

switch (cvstr(dp, sbuf)) { case Gvt: strreq(StrLen( *dp)); StrLoc(*dp) = alcstr(StrLoc(*dp), StrLen(*dp)); case NoGvt: k_subject = *dp; k_pos = 1; break; default: runerr(103, dp); } return 1; }

1$.& np#t and O#tp#t


Icon s pports only seI ential file access( The r n-time system ses C library ro tines to perform inp t and o tp t, so the main implementation iss es are those that relate to interfacing these ro tines( 12(3(1 6iles 1 )al e of type file in Icon points to a bloc0 that contains the s al title +ord, a #IL$ K reference to the file, a stat s +ord, and the string name of the file( The file stat s )al es are 0 1 2 < G 1B closed open for reading open for +riting open to create open to append open as a pipe

These decimal n mbers correspond to bits in the stat s +ord( $or e.ample, the )al e of Linp t is

+hile the )al e of Lo tp t is

1!"

1nother e.ample is
out := open("log", "a")

for +hich the )al e of o t is

#ote that the file stat s is 10, corresponding to being open for +riting and appending( Closing a file, as in
close(out)

merely changes its file stat s@

12(3(2 *eading and <riting /ata The f nction read3f4 reads a line from the file f( In U#ID, a line is 5 st a string of characters p to a ne+line character( There is no limit on the length of a line and the length of a line cannot be determined before it is read( 6n the other hand, there m st be a place to store the line( ConseI ently, a limit is placed on the length of a line that can be read by read3f4( This limit is an implementation parameter, +hich is s ally 2,0<G characters( Characters are read into a b ffer ntil a ne+line character is enco ntered or the limit is reached( 1 predicti)e need reI est is then made to ass re that there is eno gh space in the allocated string region for the string, and the string is copied from the b ffer into the string region( The f nction reads3f, i4 reads i characters from the file f( These characters may incl de ne+line characters( There is no limit on i, since the ma.im m length of the string to be read is 0no+n before characters are read( 1 predicti)e need reI est can be made to ass re that there is eno gh space in the allocated string region( Characters are then read directly into the allocated string region +itho t the se of an inter)ening b ffer( 9hen strings are +ritten, they are +ritten directly from the allocated string region( There is no need to perform any allocation or to se an intermediate b ffer( *e)eral strings can be concatenated on a file by
write(s1 , s2, ..., sn)

This a)oids the internal allocation and concatenation that is reI ired for

1!#

write(s1 || s2 || ...|| sn)

1$.( -iagnostic %acilities


IconJs diagnostic facilities consist of

The f nction image3.4, +hich prod ces a string representation of the )al e of .( The f nction display3f, i4, +hich +rites the names and )al es of identifiers in at most i le)els of proced re call to the file f( Tracing of proced re calls, ret rns, res mptions, and s spensions( E n-time error termination messages(

Proced re tracing is done in in)o0e, pret, pfail, and ps sp( If the )al e of Ltrace is non7ero, it is decremented and an appropriate trace message is +ritten to standard error o tp t( *ee *ec( 2(1(12 for an e.ample( The f nction display3f, i4 m st locate the names and )al es of local identifiers and arg ments( The names are in the proced re bloc0 for the c rrent proced re, +hich is pointed to by the 7eroth arg ment of the c rrent proced re call( The )al es are on the interpreter stac0 as described in *ec( 10(3(3( E n-time termination messages are prod ced by the C ro tine r nerr3n, dp4, +here dp is a pointer to the descriptor for the offending )al e( The )al e #U&& is sed for dp in cases +here there is no offending )al e to print( In all of these diagnostic sit ations, string representations of )al es are needed( The string representation for the2 scalar2 types string, cset, integer, and real is similar to +hat it is in the te.t of a so rce-lang age program( &ong strings and csets are tr ncated to pro)ide o tp t that is easy to read( 6ther types present a )ariety of problems( $or proced res, the type and proced re name are gi)en( 1 list, on the other hand, may be arbitrarily large and may contain )al es of any type, e)en lists( 9hile the name may s ffice for a proced re, often more information abo t a list is needed( 1s a compromise bet+een information content and readability, only the first three and last three elements of a long list are incl ded in its string representation( *ince lists and other nonscalar types may be elements of lists, their representation as elements of a list is more restricted, +itt only the type and si7e being sho+n( *ince trace, display, and error o tp t are +ritten to files, the string representations can be +ritten as they are determined, +itho t regard for ho+ long they are( The f nction image3.4, on the other hand, ret rns a string )al e, and space m st be allocated for it( 1 more limited form of string representation is sed for nonscalar )al es, since the space needed might other+ise be )ery large( 454*CI)4) 12(1 It is possible to concei)e of meaningf l +ays to con)ert any type of data in Icon to any other( $or e.ample, a proced re might be con)erted to a string that consists of the proced re declaration( =o+ +o ld s ch a general con)ersion feat re affect the +ay that types are con)erted in the r n-time system?

1"&

12(2 6n comp ters +ith 1B-bit +ords, Icon has t+o representations for integers internally 3see *ec( <(1(34( %escribe ho+ this complicates type con)ersion( 12(3 =o+ +o ld the addition of a ne+ n meric type, s ch as comple. n mbers, affect type con)ersion? 12(< =o+ big +o ld >a.C)t&en be if Icon had A12 different characters? 12G? B<? 12(A * ppose a large-integer type +ere added to Icon to allo+ arithmetic on integers of an arbitrarily large si7e( K =o+ +o ld this affect the n merical con)ersion ro tines? K =o+ 2+o ld this affect c)str and the ro tines that call it? 12(B &ist all the so rce-lang age operations that perform assignment( 12(F The ro tine doasgn ret rns the )al e 0 if assignment cannot be made( This signal res lts in a fail re ret rn in the f nctions that call doasgn 3see *ec( 12(2(24( 9hy not se $ail in doasgn? 12(G 1ss ming that ., y, 7, and + all ha)e string )al es, diagram the str ct res that are prod ced in the co rse of e)al ating the follo+ing e.pressions@ .QyR @P 7 7 @P .QyR .QyR @P 7Q+R .QyRQ7R @P + Eepeat this e.ercise for the case +here all the identifiers ha)e tables as )al es( 12(: "i)e an e.pression in +hich a table-element trapped )ariable points to a tableelement bloc0 rather than to a table-element trapped-)ariable bloc0( 12(10 "i)e an e.pression in +hich a table-element trapped )ariable points to a tableelement trapped-)ariable bloc0, b t +here there is a table-element bloc0 in the table +ith the same entry )al e( 12(11 9hy are tended descriptors needed in doasgn b t not in deref) 12(12 *ho+ an e.pression in +hich, at the end of the case for assignment to a s bstring trapped )ariable, the )ariable to +hich the assignment is to be made is a trapped )ariable( Can s ch a trapped )ariable be of any of the three types? 12(13 9hy is the string prod ced by read3f4 not read directly into the allocated string region? 12(1< 1re there any circ mstances in +hich +rite3.1, .2, (((, .n4 reI ires the allocation of storage? 12(1A Identify all the portions of bloc0s for so rce-lang age )al es that are necessary only for diagnostic o tp t( =o+ significant is the amo nt of space in)ol)ed? 12(1B The se of trapped )ariables for 0ey+ords that reI ire special processing for assignment s ggests that a similar techniI e might be sed for s bstring and tableelement trapped )ariables( C)al ate this possibility(

1"1

Part II: 2n $ptimi'ing Compiler for Icon


by 8enneth 9( 9al0er

1"2

!reface to !art
There are many optimi7ations that can be applied +hile translating Icon programs( These optimi7ations and the analyses needed to apply them are of interest for t+o reasons( $irst, IconJs niI e combination of characteristics reI ires de)eloping ne+ techniI es for implementing them( *econd, these optimi7ations are sef l in )ariety of lang ages and Icon can be sed as a medi m for e.tending the state of the art( >any of these optimi7ations reI ire detailed control of the generated code( Pre)io s prod ction implementations of the Icon programming lang age ha)e been interpreters( The )irt al machine code of an interpreter is seldom fle.ible eno gh to accommodate these optimi7ations and modifying the )irt al machine to add the fle.ibility destroys the simplicity that 5 stified sing an interpreter in the first place( These optimi7ations can only reasonably be implemented in a compiler( In order to e.plore these optimi7ations for Icon programs, a compiler +as de)eloped( This dissertation describes the compiler and the optimi7ations it employs( It also describes a r n-time system designed to s pport the analyses and optimi7ations( Icon )ariables are ntyped( The compiler contains a type inferencing system that determines +hat )al es )ariables and e.pression may ta0e on d ring program e.ec tion( This system is effecti)e in the presence of )al es +ith pointer semantics and of assignments to components of data str ct res( The compiler stores intermediate res lts in temporary )ariables rather than on a stac0( 1 simple and efficient algorithm +as de)eloped for determining the lifetimes of intermediate res lts in the presence of goal-directed e)al ation( This allo+s an efficient allocation of temporary )ariables to intermediate res lts( The compiler ses information from type inferencing and li)eness analysis to simplify generated code( Performance meas rements on a )ariety of Icon programs sho+ these optimi7ations to be effecti)e( The optimi7ing compiler for Icon +as de)eloped by 8en 9al0er as part of his Ph(%( research, and this part of the Icon!Unicon Compendi m is essentially a reprint of his dissertation, +hich also appeared as Uni)ersity of 1ri7ona C* TE :1-1B( 1long +ith his consent, 8en 0indly pro)ided the original groff so rces to his dissertation( 1ny typographical and formatting errors that remain are the fa lt of the editor(

1"3

Chapter 13: The $ptimi'ing Compiler


Iconc is a practical and complete optimi7ing compiler for a niI e and comple. programming lang age( Part II describes the theory behind se)eral parts of the compiler and describes the implementation of all interesting aspects of the compiler(

1&.1 Moti6ation
The moti)ation for de)eloping a compiler for the Icon programming lang age is to ha)e a )ehicle for e.ploring optimi7ation techniI es( *ome performance impro)ements can be obtained by modifying the r n-time system for the lang age, for e.ample by implementing alternati)e data str ct res or storage management techniI es( These impro)ements may apply to a broad class of programs and the techniI es can reasonably be implemented in an interpreter system( =o+e)er, other techniI es, s ch as eliminating nnecessary type chec0ing, apply to e.pressions +ithin specific programs( The Icon interpreter described in Part I is based on a )irt al machine +ith a relati)ely small instr ction set of po+erf l operations( 1 small instr ction set is easier to implement and maintain than a large one, and the po+er of many of the indi)id al operations ins res that the o)erhead of the decoding loop is not e.cessi)e( The disad)antage of this instr ction set is that an Icon translator that generates code for the interpreter does not ha)e eno gh fle.ibility to do many of the possible program-specific optimi7ations( It is possible to de)ise a set of more primiti)e )irt al machine instr ctions that e.pose more opport nities for these optimi7ations( Increasingly primiti)e instr ction sets pro)ide increasingly more opport nities for optimi7ations( In the e.treme, the instr ction set for a comp ter 3hard+are interpreter4 can be sed and the translator becomes a compiler( 1 compiler +as chosen for this research beca se it is a good )ehicle for e.ploring program-specific optimi7ations and eliminates the o)erhead of a soft+are interpreter +hich might other+ise become e.cessi)e(

1&.$ Type nferencing


>ost Icon operations reI ire operands +ith specific types( The types of the act al operands in an e.pression m st be chec0ed and possibly con)erted to the reI ired types( =o+e)er, Icon )ariables are ntyped, in general, this chec0ing cannot done at translation time( The Icon interpreter ta0es the simple approach to the problem and performs all of the type chec0ing for an e.pression e)ery time it is e.ec ted( $or most programs, a type inferencing system can pro)ide the information needed to do m ch of the chec0ing at translation time, eliminating the need for these chec0s at r n time( 1 type inferencing system determines the types that elements of a program 3)ariables, e.pression, proced res, etc4 can ta0e on at r n time( The Icon compiler contains an effecti)e and practical type inferencing system, and implements code generation optimi7ations that ma0e se of the information prod ced by the type inferencing system( T+o basic approaches ha)e been ta0en +hen de)eloping type inferencing schemes( *chemes based on nification Q(>ilner,smltl0 type, nify(R constr ct type signat res for proced res, schemes based on global data flo+ analysis Q(typinfer, typrcs), fl+anal, progfl+(R propagate thro gho t a program the types )ariables may ta0e on( 6ne strength of the nification approach is that it is effecti)e at handling polymorpho s proced res(

1"4

* ch schemes ha)e properties that ma0e them effecti)e in implementing fle.ible compiletime type systems( > ch of the research on them foc ses on this fact( The primary p rpose of the type inferencing system for the Icon compiler is to eliminate most of the r n-time type chec0ing rather than to report on type inconsistencies at compile time, so these properties ha)e little impact on the choice of schemes sed in the compiler( Type inferencing systems based on nification ha)e a significant +ea0ness( Proced re typesignat res do not describe side effects to global )ariables( Type inferencing schemes based on nification m st ma0e cr de ass mptions abo t the types of these )ariables( *chemes based on global data flo+ analysis handle global )ariables effecti)ely( >any Icon programs ma0e significant se of global )ariables, this is a strong arg ment in fa)or of sing this 0ind of type inferencing scheme for Icon( These schemes do a poor 5ob of inferring types in the presence of polymorpho s proced res( It is generally too e.pensi)e for them to comp te the res lt type of a call in terms of the arg ment types of that specific call, so res lt types are comp ted based on the aggregate types from all calls( Poor type information only res lts if polymorphism is act ally e.ploited +ithin a program( The primary se of polymorpho s proced res is to implement abstract data types( Icon, on the other hand, has a rich set of b ilt-in data types( 9hile Icon programs ma0e hea)y se of these b ilt-in data types and of IconJs polymorpho s b ilt-in operations, they seldom ma0e se of ser-+ritten polymorpho s proced res( 9hile a type inferencing scheme based on global data flo+ analysis is not effecti)e in inferring the precise beha)ior of polymorpho s proced res, it is effecti)e in tili7ing the predetermined beha)ior of b ilt-in polymorpho s operations( These facts combined +ith the obser)ation that Icon programs often ma0e se of global )ariables indicate that global data flo+ analysis is the approach of choice for type inferencing in the Icon compiler( Icon has se)eral types of non-applicati)e data str ct res +ith pointer semantics( They all can be heterogeneo s and can be combined to form arbitrary graphs( 1n effecti)e type inferencing system m st handle these data str ct res +itho t losing too m ch information thro gh cr de ass mptions( These composite data str ct res typically consist of a fe+ basic elements sed repeatedly and they logically ha)e a rec rsi)e str ct re( 1 n mber of type inferencing systems handle rec rsion in applicati)e data str ct res Q(analrcs),prlgtyp,typrcs)(R, the system described here handles Icon data types that ha)e pointer semantics and handles destr cti)e assignment to components of data str ct res( 1nalyses ha)e been de)eloped to handle pointer semantics for problems s ch as allocation optimi7ations and determining pointer aliasing to impro)e other analyses( =o+e)er, most of these analyses lose too m ch information on heterogeneo s str ct res of nbo nded depth 3s ch as the m t ally referencing synta. trees and symbol tables commonly fo nd in a translator4 to be effecti)e type inferencing systems Q(progfl+,depptr(R( 9or0 by Chase, 9egman, and [adec0 Q(pntstr(R p blished s bseI ent to the original technical report on the Icon type inferencing system Q(trGG-2A(R presents a techniI e similar to the one sed in this type inferencing system( They se a minimal lang age model to describe the se of the techniI e for pointer analysis( They spec late that the techniI e might be too slo+ for practical se and propose methods of impro)ing the techniI e in the conte.t of pointer analysis( Use of the prototype Icon type inferencing system described in the original technical report indicates that memory sage is more of a problem than e.ec tion time( This problem is addressed in the implementation of type inferencing in the Icon compiler(

1"5

1&.& "i6eness Analysis


Type chec0ing optimi7ations can be )ie+ed as forms of arg ment handling optimi7ations( 6ther arg ment handling optimi7ations are possible( $or e.ample, +hen it is safe to do so, it is more efficient to pass a )ariable arg ment by reference than to copy it to a separate location and pass a reference to that location 3this partic lar opport nity for optimi7ation arises beca se of implementation techniI es borro+ed from the Icon interpreter -- Icon )al es are larger than pointers and Icon parameter passing is b ilt on top of C parameter passing4( * ch optimi7ations are not possible in a stac0-based e.ec tion model, a temporary-)ariable model is needed and s ch a model is sed by the Icon compiler( IconJs goal-directed e)al ation can e.tend the lifetime of the intermediate )al es stored in temporary )ariables( Icon presents a niI e problem in liveness analysis, +hich is the static determination of the lifetime of )al es in a program Q1*UGB, progfl+(R( 9hile this problem, li0e other li)eness problems, can be sol)ed +ith traditional techniI es, it has eno gh str ct re that it can be sol)ed +itho t precomp ting a flo+ graph or sing e.pensi)e forms of data flo+ analysis( The only pre)io s implementation of Icon sing a temporary-)ariable model is a partial implementation by Christopher Q(tccompile(R( Christopher ses the fact that Icon programs contain many instances of bo nded goal-directed e)al ation to ded ce limits for the lifetimes of intermediate )al es( =o+e)er, this approach prod ces a )ery cr de estimate for these lifetimes( 9hile o)erestimating the lifetime of intermediate )al es res lts in a safe allocation of temporary )ariables to these )al es, a fine-grained li)eness analysis res lts in the se of fe+er temporary )ariables( The Icon compiler addresses this problem of finegrained li)eness analysis in the presence of goal-directed e)al ation and addresses the problem of applying the information to temporary )ariable allocation(

1&.( Analyzing 9oal+-irected 56al#ation


>any 0inds of analyses of Icon programs m st deal +ith IconJs goal-directed e)al ation and its niI e control str ct res( These analyses incl de type inferencing, li)eness analysis, and the control flo+ analyses in 6J/agyJs prototype compiler Q(trGG-31(R( %etermining possible e.ec tion paths thro gh an Icon program is more complicated than it is for programs +ritten in more con)entional lang ages( The implementation of the type inferencing system and li)eness analysis here e.plore )ariations on the techniI es presented by 6J/agy( The Organization of Part II Part II is logically di)ided into three s bparts( Chapters 1< thro gh 1B present the main ideas pon +hich the compiler is based, Chapters 1F thro gh 22 describe the implementation of these ideas, and Chapter 23 presents performance meas rements of compiled code( Chapter 1< describes the code generated by the compiler( It e.plains ho+ Icon data )al es, )ariables, and goal-directed e)al ation are implemented, independent of the act al translation process( Chapter 1A presents a theoretical model of the type inferencing system sed in the compiler( The model incl des the important ideas of the type inferencing system, +hile ignoring some p rely pragmatic details( Chapter 1B e.plains the li)eness analysis problem and presents the sol tion sed in the compiler(

1"

The Icon compiler is designed to be a prod ction-I ality system( The compiler system consists of the compiler itself and a r n-time system( The fact that these t+o components are not entirely independent m st be caref lly considered in the design of s ch a prod ction-I ality system( Chapter 1F describes the system as a +hole and ho+ the interactions bet+een the components are handled( Chapter 1G presents the organi7ation of the compiler itself( This chapter describes some parts of the compiler in detail, b t defers ma5or topics to other chapters( Chapter 1: b ilds on the model presented in Chapter 1A and describes the f ll type inferencing system sed in the compiler and its implementation( Chapter 20 describes the translation techniI es sed to prod ce code from e.pressions that employ IconJs goal-directed e)al ation scheme and its niI e control str ct res( It also describes the allocation of temporary )ariables sing the information prod ced by li)eness analysis( The code generator does no loo0-ahead and as a res lt it often prod ces code that is poor +hen ta0en in conte.t of s bseI ent code( This problem is shared +ith most code generators as are some of the sol tions sed in this compiler( The niI e code generation techniI es reI ired by IconJs goal-directed e)al ation prod ce n s al )ariations of this problem and reI ire some inno)ati)e sol tions in addition to the standard ones( Chapter 21 describes the )ario s techniI es employed to handle this problem( Chapter 22 describes the optimi7ations that can be done sing the res lts of type inferencing( These optimi7ations also ma0e se of li)eness information( Chapter 23 demonstrates the effects of the )ario s optimi7ations sed in the compiler on the performance of specific 0inds of e.pressions( It also presents meas rements of the performance of compiled code for a )ariety of complete programs, comparing the performance to that of the Icon interpreter( In addition, the si7es of the e.ec table code for the complete programs are presented( The concl sions, Chapter 2<, s mmari7e +hat has been done and lists some +or0 that remains to be e.plored( Chapter 2A describes one s ccessf l pro5ect to impro)e the compiler and ma0e it sable on larger programs(

1"!

Chapter 14: The Translation 3odel


>odern compilers seldom prod ce machine code directly( They translate a program into a form closer to machine code than the so rce lang age and depend on other tools to finish the translation( If the compiler prod ces an ob5ect mod le, it depends on a lin0er and a loader to prod ce e.ec table code( If the compiler prod ces assembly lang age, it also depends on an assembler( 1 recent trend among compilers prod ced in research en)ironments has been to prod ce C code Q(cboo0,ansi-c(R, adding a C compiler to the list of tools reI ired to finish the translation to machine code Q(*E, Eama0rishnan, /artlett G:, U asa,*tro str p,yacc,le.(R( The Icon compiler ta0es this approach and generates C code( There are se)eral ad)antages to compiling a lang age into C( &o+-le)el problems s ch as register allocation and the selection and optimi7ation of machine instr ctions are handled by the C compiler( 1s long as these problems are o tside the scope of the research addressed by the compiler, it is both reasonable and effecti)e to allo+ another compiler to deal +ith them( In general, it is easier to generate code in a higher-le)el lang age, 5 st as it is easier to program in a higher-le)el lang age( 1s long as the target lang age lies on a \\nearly direct pathJJ from the so rce lang age to machine code, this +or0s +ell( C is closely matched to most modern machine architect res, so fe+ tangential translations m st be done in generating C code from Icon( 1nother ad)antage of generating C code is that it greatly increases the portability of the compiler and facilitates cross-compilation( The pop larity of C in recent years has res lted in prod ction-I ality C compilers for most systems( 9hile the implementation of Icon in C contains some machine and system dependencies, CJs conditional compilation, macro, and file incl sion facilities ma0e these dependencies relati)ely easy to deal +ith +hen they arise( These facts ma0e possible the de)elopment of a highly portable Icon compiler, allo+ing the compilerJs effecti)eness to be tested by IconJs large ser comm nity(

1(.1 -ata *epresentation


/eca se the target lang age is C, Icon data m st be represented as C data( The caref l representation of data and )ariables is important to the performance of an implementation of a high-le)el lang age s ch as Icon( In addition, information pro)ided by type inferencing can be sed to optimi7e these representations( =o+e)er, s ch considerations are largely o tside the scope of this c rrent research( $or this reason, the representations sed in code prod ced by this compiler and the compilerJs r n-time system are largely nchanged from those of the Icon interpreter system described in Part I( The interpreterJs r n-time system is +ritten in C( Therefore borro+ing its data representations for the compiler system is simple( This choice of representation means that the r n-time system for the compiler co ld be adapted directly from the r n-time system for the interpreter, and it allo+ed the compiler de)elopment to concentrate on parts of the system addressed by this research( In addition, this choice of representation allo+s a meaningf l comparison of the performance of compiled code to the performance of interpreted code( 1n Icon )al e is represented by a t+o-+ord descriptor 3see *ection <(14( The first +ord, the d-word, contains type information( In the case of a string )al e, the type is indicated by 7ero in a high-order bit in the d-+ord, and the length of a string is stored in lo+-order bits of the d-+ord( 1ll other types ha)e a one in that bit and f rther type information else+here in the d-+ord( The v-word of a descriptor indicates the )al e( The )-+ord of the n ll )al e is 7ero, the )-+ord of an Icon integer is the corresponding C integer )al e, and )-+ords of

1""

other types are pointers to data( 1 descriptor is implemented +ith the follo+ing C str ct re@
struct descrip { word dword; /* type field */ union { word integr; /* integer value */ char sptr; /* pointer to character string */ union block bptr; /* pointer to a block */ dptr descptr; /* pointer to a descriptor */ } vword; };

+ord is defined to be a C integer type 3one that is at least 32-bits long4, bloc0 is a nion of str ct res implementing )ario s data types, and dptr is a pointer to a descrip str ct re(

1(.$ ntermediate *es#lts


9hile the representation of data in the compiler is the same as in the interpreter, the method of storing the intermediate res lts of e.pression e)al ation is not( T+o basic approaches ha)e been sed in lang age implementations to store intermediate res lts( 1 stac0-based approach is simple and dynamic( It reI ires no pre-analysis of e.pressions to allocate storage for the intermediate res lts, b t the simple rigid protocol allo+s little room for optimi7ation( $or Icon there is an additional problem +ith a stac0-based approach( "oal-directed e)al ation e.tends the lifetime of some intermediate res lts, reI iring that the top elements of the e)al ation stac0 be copied at critical points in e.ec tion Qsee Part I, or U1 trGG-31R( In spite of the need for this e.tra copying, most pre)io s implementations of Icon ha)e been implemented +ith an e)al ation stac0( 1n alternati)e to sing a stac0 is to pre-allocate a temporary )ariable for each intermediate res lt( In this model, operations ta0e e.plicit locations as arg ments( Therefore an operation can directly access program )ariables as arg ments, there is no need to perform the e.tra operations of p shing addresses or )al es on a stac0( In addition, the lifetime of a temporary )ariable is not determined by a rigid protocol( The compiler can assign an intermediate res lt to a temporary )ariable o)er an arbitrary portion of the program, eliminating the copying needed to preser)e a )al e beyond the lifetime imposed by a stac0based approach( This compiler ses the temporary-)ariable model beca se it allo+s more opport nities to optimi7e parameter handling, a ma5or goal of this research( IconJs a tomatic storage management dictates the se of a garbage collector in the r ntime system( 9hen this garbage collector is in)o0ed, it m st be able to locate all )al es that may be sed later in the program( In the interpreter system, intermediate )al es and local )ariables are stored on the same stac0( The garbage collector s+eeps this stac0 to locate )al es( In the compiler, a different approach is ta0en to ins re that all necessary )al es are locatable( 1rrays of descriptors are allocated contig o sly along +ith a co nt of the n mber of descriptors in the array( The arrays are chained together( 1n array of descriptors may be local to a C f nction, or it may be allocated +ith the malloc library f nction( The garbage collector locates )al es by follo+ing the chain and scanning the descriptors in each array( These descriptors are referred to as tended descriptors(

1(.& 58ec#ta.le Code


C)en more important than +here intermediate res lts are stored is ho+ they are comp ted( *ome aspects of Icon e.pression e)al ation are similar to those of many other lang ages,

1"#

b t others aspects are not( "oal-directed e)al ation +ith bac0trac0ing poses a partic lar challenge +hen implementing Icon e.pression e)al ation( The Icon interpreter is based on a )irt al machine that incl des bac0trac0ing, as are Prolog interpreters based on the 9arren 1bstract >achine Q(+am(R( 9hile details differ bet+een the Icon and Prolog )irt al machines, their implementation of control bac0trac0ing is based on the same abstract data str ct res and state )ariables( * ch a )irt al machine contains a stac0 of proced re frames, b t the stac0 is maintained differently from that of a )irt al machine that does not implement goal-directed e)al ation( The difference manifests itself +hen a proced re prod ces a res lt, b t has alternate res lts that it can prod ce in the e)ent of bac0trac0ing( 9hen this occ rs, the frame for the proced re remains on the stac0 after control ret rns to the caller of the proced re( This frame contains the information needed to prod ce the alternate res lts( The left stac0 in the follo+ing diagram sho+s that proced re f has called proced re g( The arro+s on the left of the stac0 represent the bac"trac"ing chain of proced res that can prod ce alternate res lts( btp points to the head of the bac0trac0ing chain +hich c rrently starts f rther do+n in the stac0( The arro+s on the right represent the call chain of proced res( fp points to the frame of the c rrently e.ec ting proced re(

* ppose g prod ces the first of se)eral possible res lts( C.ec tion ret rns to f and gJs frame is added to the bac0trac0ing chain( This is represented by the middle stac0 in the diagram( If f then calls h, its proced re frame is added to the top of the stac0 as sho+n in the right stac0 in the diagram( If h prod ces a res lt and is not capable of prod cing more, e.ec tion ret rns to f and the stac0 again loo0s li0e the one in the middle of the diagram 3the program pointer +ithin f is different, of co rse4( If h prod ces a res lt and is capable of prod cing more, e.ec tion ret rns to f, b t hJs frame remains on the stac0 and is added to the head bac0trac0ing chain, similar to +hat +as done +hen g prod ced a res lt( If h prod ces no res lts, bac0trac0ing occ rs( hJs frame is remo)ed from the stac0, e.ec tion ret rns to the proced re g +hoJs frame is at the head of the bac0trac0ing chain, and gJs frame is remo)ed from the head of the chain( The stac0 once again loo0s li0e left stac0 in the diagram and g proceeds to prod ce another res lt( Traditional lang ages s ch as Pascal or C present high-le)el )irt al machines that contain no notion of bac0trac0ing and ha)e no need to perform lo+-le)el stac0 manip lations( Icon e.pressions +ith goal-directed e)al ation cannot be translated directly into s ch lang ages( This is the f ndamental problem that m st be addressed +hen designing a compiler for Icon( 6J/agy presents an elegant sol tion to this problem in her dissertation Q(trGG-31(R( =er sol tion is sed by this optimi7ing compiler as a basis for translating Icon e.pressions into C code( The rest of this section contains a brief e.planation of the )ariation of her approach that is sed in the compiler, +hile e.ploring sef l +ays of

1#&

)ie+ing the problem( 6J/agyJs dissertation describes ho+ control str ct res not co)ered in this disc ssion can be implemented sing her model( $ormal semantics is one tool that can be sed in nderstanding a lang age Q(gordon denote,stoy(R( The added comple.ity ca sed by IconJs goal-directed e)al ation is reflected in " demanJs description of Icon sing denotational semantics Q(g deman denotational(R( 9hile con)entional programming lang ages can be described sing one contin ation for each e.pression, Icon reI ires t+o contin ations( 6ne contin ation for an e.pression embodies the rest of the program if the e.pression s cceeds, +hile the other embodies the rest of the program if the e.pression fails( The Icon compiler ses the notion of s ccess contin ations to implement goal-directed e)al ation( =o+e)er, these contin ations )iolate some of the properties traditionally associated +ith contin ations( 1 contin ation in denotational semantics and in the lang age *cheme Q(1belson,QEees GBR(R is a f nction that ne)er ret rns( =o+e)er, the s ccess contin ations prod ced by the compiler implement bac0trac0ing by ret rning( In addition, these contin ations implement the rest of the c rrent bo nded e.pression rather than the rest of the entire program( #ote that nli0e contin ations in *cheme, these contin ations are created at compile time, not at r n time( *ome Prolog compilers ha)e been based on a similar contin ation-passing techniI e Q(#ilsson,Eama0rishnan(R( The C lang age is oriented to+ard an imperati)e style of programming( In order to prod ce efficient code, the Icon compiler sho ld not generate an e.cessi)e n mber of f nction calls( *pecifically, it sho ld a)oid creating contin ations for e)ery e.pression( 1 more operational )ie+ of IconJs semantics and of CJs semantics can be sef l in nderstanding ho+ to accomplish this( 1n operation in Icon can s cceed or fail( In the )ie+ of denotational semantics, the I estion of +hat +ill be done in each case m st be ans+ered, +ith the ans+ers ta0ing the form of f nctions( In an operational )ie+, the I estions can ta0e the form of +here to go in each case( The ans+ers to these I estions can be any type of transfer of control s pported by the C lang age@ e.ec te the ne.t seI ential instr ction, e.ec te a f nction, ret rn from a f nction, or go to a label( >ost operations in Icon are monogenic( That is, they prod ce e.actly one res lt, li0e operations in con)entional lang ages( $or these operations, the compiler can generate code +hose e.ec tion simply falls thro gh into the code that implements the s bseI ent operation( Conditional operations are more interesting( These operations either prod ce a single )al e or fail( If s ch an operation s cceeds, e.ec tion can fall thro gh into code implementing the s bseI ent operation( =o+e)er, if the operation fails, e.ec tion m st transfer else+here in the program( This is accomplished by branching to a failure label( If the code for the operation is p t in-line, this is straightfor+ard( =o+e)er, if the operation 3either a b ilt-in operation or an Icon proced re4 is implemented by a separate C f nction, the f nction m st notify the caller +hether it s cceeded or failed and the caller m st effect the appropriate transfer of control( /y con)ention, C f nctions prod ced by the compiler and those implementing the r n-time ro tines each ret rn a signal 3this con)ention is )iolated in some special cases4( 1 signal is an integer 3and is nrelated to Uni. signals4( If one of these C f nctions needs to ret rn an Icon )al e, it does so thro gh a pointer to a res lt location that is passed to it as an arg ment( T+o standard signals are represented by the manifest constants 1VContin e and 1VEes me( 1 ret rn 3either an Icon ret rn e.pression or the eI i)alent constr ct in a b ilt-in operation4 is implemented +ith code similar to

1#1

*result = operation result; return A_Continue;

$ail re is implemented +ith the code


return A_Resume;

The code implementing the call of an operation consists of both a C call and signalhandling code(
switch (operation(args, &result)) { case A_Continue: break; case A_Resume: goto failure label; }

This code clearly can be simplified( This form is general eno gh to handle the more comple. signal handling that can arise d ring code generation( *implifying signal handling code is described in Chapter 21( "enerators pose the real challenge in implementing Icon( 1 generator incl des code that m st be e.ec ted if s bseI ent fail re occ rs( In addition, a generator, in general, needs to retain state information bet+een s spending and being res med( 1s mentioned abo)e, this is accomplished by calling a s ccess contin ation( The s ccess contin ation contains s bseI ent operations( If an operation in the contin ation fails, an 1VEes me signal is ret rned to the generator, +hich then e.ec tes the appropriate code( The generator retains state information in local )ariables( If the generator is implemented as a C f nction, a pointer to the contin ation is passed to it( Therefore, a f nction implementing a generati)e operation need not 0no+ its s ccess contin ation ntil r n time( Consider the operation i to *( This operation can be implemented in Icon +ith a proced re li0e
procedure To(i, j) while i <= j do { suspend i i +:= 1 } fail end

It can be implemented by an analogo s C f nction similar to the follo+ing 3for simplicity, C ints are sed here instead of Icon )al es4(
int int int int to(i, j, result, succ_cont) i, j; *result; (*succ_cont)(); { int signal; while (i <= j) { *result = i; signal = (*succ_cont)(); if (signal != A_Resume) return signal; ++i; } return A_Resume; }

1#2

There is no e.plicit fail re label in this code, b t it is possible to )ie+ the code as if an implicit fail re label occ rs before the NNi( The Icon e.pression
every write(1 to 3)

can be compiled into the follo+ing code 3for simplicity, the +rite f nction has been translated into printf and scoping iss es for res lt ha)e been ignored4( #ote that the e)ery simply introd ces fail re(
switch (to(1, 3, &result, sc)) { /* standard signal-handling code */ ... } int sc() { printf("%d\n", result); return A_Resume; }

The final aspect of Icon e.pression e)al ation that m st be dealt +ith is that of bo nded e.pressions( 6nce e.ec tion lea)es a bo nded e.pression, that e.pression cannot be res med( 1t this point, the state of the comp tation +ith respect to bac0trac0ing loo0s as it did +hen e.ec tion entered the bo nded e.pression( This means that, in generated code, +here to go on fail re 3either by branching to an e.plicit fail re label or by ret rning an 1VEes me signal4 m st be the same( =o+e)er, this fail re action is only correct in the C f nction containing the start of the code for the bo nded e.pression( If a f nction s spended by calling a s ccess contin ation, e.ec tion is no longer in that original C f nction( To accommodate this restoration of fail re action, e.ec tion m st ret rn to that original f nction( This is accomplished by setting p a bounding label in the original C f nction and allocating a signal that corresponds to the label( 9hen the end of the bo nded e.pression is reached, the signal for the bo nding label is ret rned( 9hen the signal reaches the f nction containing the label, it is con)erted into a goto( It can be determined statically +hich calls m st con)ert +hich signals( #ote that if the bo nded e.pression ends in the original C f nction, the \\ret rn signalJJ is already in the conte.t of the label( In this case, it is immediately transformed into a goto by the compiler, and there is no real signal handling( Consider the Icon e.pression
move(1); ...

The mo)e f nction s spends and the C f nction implementing it needs a s ccess contin ation( In this case, mo)e is called in a bo nded conte.t, so the s ccess contin ation m st ret rn e.ec tion to the f nction that called mo)e( The contin ation ma0es se of the fact that, li0e the C f nction for to, the one for mo)e only intercepts 1VEes me signals and passes all other signals on to its caller( This e.pression can be implemented +ith code similar to the follo+ing( There are t+o possible signals that might be ret rned( mo)e itself might prod ce an 1VEes me signal or it might pass along the bo nding signal from the s ccess contin ation( #ote that for a compo nd e.pression, both the bo nding label and the fail re label are the same( In general, this is not tr e( In this conte.t, the res lt of mo)e314 is discarded( The )ariable trashcan recei)es this )al e, it is ne)er read(

1#3

switch (move(1, &trashcan, sc)) { case 1: goto L1; case A_Resume: goto L1; } L1: /* bounding label & failure label */ ... int sc() { return 1; /* bound signal */ }

Calling Conventions This disc ssion has to ched on the s b5ect of calling con)entions for r n-time ro tines( In Icon, it is, in general, impossible to 0no+ ntil r n time +hat an in)ocation is in)o0ing( This is handled in the compiler +ith a standard calling con)ention for the C f nctions implementing operations and proced res( This calling con)ention allo+s a C f nction to be called +itho t 0no+ing anything abo t the operation it implements( 1 f nction conforming to the standard calling con)ention has fo r parameters( These parameters are, in order of appearance, the n mber of Icon arg ments 3a C int4, a pointer to the beginning of an array of descriptors holding the Icon arg ments, a pointer to the descriptor sed as the Icon res lt location, and a s ccess contin ation to se for s spension( The f nction itself is responsible for any arg ment con)ersions incl ding dereferencing, and for arg ment list ad5 stment( 1s e.plained abo)e, the f nction ret rns an integer signal( The f nction is allo+ed to ret rn the signals 1VEes me, 1VContin e, and any signals ret rned by the s ccess contin ation( It may ignore the s ccess contin ation if it does not s spend( The f nction may be passed a n ll contin ation( This indicates that the f nction +ill not be res med( In this case, s spend acts li0e a simple ret rn, passing bac0 the signal 1VContin e 3this is not sho+n in the e.amples4( The o tline of a standard-conforming f nction is
int function-name(nargs, args, result, succ_cont) int nargs; dptr args; dptr result; continuation succ_cont; { ... }

continuation is defined to be a pointer to a f nction ta0ing no arg ments and ret rning an integer( &ater sections of this dissertation describe the code generation process in more detail and describe optimi7ations of )ario s parts of the code incl ding parameter passing, contin ations, signal handling, and branching(

1#4

1#5

Chapter 15: The T8pe Inferencing 3odel


Three sections of this dissertation are de)oted to type inferencing@ t+o chapters and an appendi.( This chapter de)elops a theoretical model of type inferencing for Icon( $or simplicity, it ignores some feat res of the lang age( This chapter presents int iti)e arg ments for the correctness of the formal model( Chapter 1: describes the act al implementation of type inferencing in the Icon compiler( The implementation handles the f ll Icon lang age and, for pragmatic reasons, differs from the theoretical model in some details( This chapter starts +ith the moti)ation for performing type inferencing( It then describes the concept of abstract interpretation( This concept is sed as a tool in this chapter to de)elop a type inferencing system from IconJs semantics( This chapter gi)es an int iti)e presentation of this de)elopment process before presenting the formal models of abstract semantics for Icon( The most abstract of the formal models is the type inferencing system(

1).1 Moti6ation
'ariables in the Icon programming lang age are ntyped( That is, a )ariable may ta0e on )al es of different types as the e.ec tion of a program proceeds( In the follo+ing e.ample, . contains a string after the read 3if the read s cceeds4, b t it is then assigned an integer or real, pro)ided the string can be con)erted to a n meric type(
x := read() if numeric(x) then x +:= 4

In general, it is impossible to 0no+ the type of an operatorJs operands at translation time, so some type chec0ing m st be done at r n time( This type chec0ing may res lt in type con)ersions, r n-time errors, or the selection among polymorpho s operations 3for e.ample, the selection of integer )ers s real addition4( In the Icon interpreter system, all operators chec0 all of their operands at r n time( This inc rs significant o)erhead( > ch of this r n-time type chec0ing is nnecessary( 1n e.amination of typical Icon programs re)eals that the types of most )ariables remain consistent thro gho t e.ec tion 3e.cept for the initial n ll )al e4 and that these types can often be determined by inspection( Consider
if x := read() then y := x || ";"

Clearly both operands of OO are strings so no chec0ing or con)ersion is needed( The goal of a type inferencing system is to determine +hat types )ariables may ta0e on d ring the e.ec tion of a program( It associates +ith each )ariable sage a set of the possible types of )al es that )ariable might ha)e +hen e.ec tion reaches the sage( This set may be a conser)ati)e estimate 3o)erestimate4 of the act al set of possible types that a )ariable may ta0e on beca se the act al set may not be comp table, or beca se an analysis to comp te the act al set may be too e.pensi)e( =o+e)er, a good type inferencing system operating on realistic programs can determine the e.act set of types for most operands and the ma5ority of these sets in fact contain single types, +hich is the information needed to generate code +itho t type chec0ing( The Icon compiler has an effecti)e type inferencing system based on data flo+ analysis techniI es(

1#

1).$ A.stract nterpretation


%ata flo+ analysis can be )ie+ed as a form of abstract interpretation Q(absintrp(R( This can be partic larly sef l for nderstanding type inferencing( 1 \\concreteJJ interpreter for a lang age implements the standard 3operational4 semantics of the lang age, prod cing a seI ence of states, +here a state consists of an e.ec tion point, bindings of program )ariables to )al es, and so forth( 1n abstract interpreter does not implement the semantics, b t rather comp tes information related to the semantics( $or e.ample, an abstract interpretation may comp te the sign of an arithmetic e.pression rather than its )al e( 6ften it comp tes a \\conser)ati)eJJ estimate for the property of interest rather than comp ting e.act information( %ata flo+ analysis is simply a form of abstract interpretation that is g aranteed to terminate( This chapter presents a seI ence of appro.imations to Icon semantics, c lminating in one s itable for type inferencing( Consider a simplified operational semantics for Icon, consisting only of program points 3+ith the c rrent e.ec tion point maintained in a program co nter4 and )ariable bindings 3maintained in an en)ironment4( 1s an e.ample of these semantics, consider the follo+ing program( $o r program points are annotated +ith n mbers sing comments 3there are n mero s intermediate points not annotated4(
procedure main() local s, n # 1: s := read() # 2: every n := 1 to 2 do { # 3: write(s[n]) } # 4: end

If the program is e.ec ted +ith an inp t of abc, the follo+ing states are incl ded in the e.ec tion seI ence 3only the annotated points are listed4( *tates are e.pressed in the form program point@ environment(
1: 2: 3: 3: 4: [s [s [s [s [s = = = = = null, n = null] "abc", n = null] "abc", n = 1] "abc", n = 2] "abc", n = 2]

It is c stomary to se the collecting semantics of a lang age as the first abstraction 3appro.imation4 to the standard semantics of the lang age( The collecting semantics of a program is defined in Co sot and Co sot Q(absintrp(R 3they se the term static semantics4 to be an association bet+een program points and the sets of en)ironments that can occ r at those points d ring all possible e.ec tions of the program( 6nce again, consider the pre)io s e.ample( In general, the inp t to the program is n0no+n, so the read f nction is ass med to be capable of prod cing any string( Eepresenting this general case, the set of en)ironments 3once again sho+ing only )ariable bindings4 that can occ r at point 3 is
[s [s [s [s = = = = "", n = 1], "", n = 2], "a", n = 1], "a", n = 2],

1#!

... [s = "abcd", n = 1], [s = "abcd", n = 2], ...

1 type inferencing abstraction f rther appro.imates this information, prod cing an association bet+een each )ariable and a type at each program point( The act al type system chosen for this abstraction m st be based on the lang age and the se to +hich the information is p t( The type system sed here is based on IconJs r n-time type system( $or str ct re types, the system sed retains more information than a simple se of IconJs type system +o ld retain, this is e.plained in detail later( $or atomic types, IconJs type system is sed as is( $or point 3 in the preceding e.ample the associations bet+een )ariables and types are
[s = string, n = integer]

The type inferencing system presented in this chapter is best nderstood as the c lmination of a seI ence of abstractions to the semantics of Icon, +here each abstraction discards certain information( $or e.ample, the collecting semantics discards seI encing information among states, in the preceding program, collecting semantics determine that, at point 3, states may occ r +ith n eI al to 1 and +ith n eI al to 2, b t does not determine the order in +hich they m st occ r( This seI encing information is discarded beca se desired type information is a static property of the program( The first abstraction beyond the collecting semantics discards dynamic control flo+ information for goal directed e)al ation( The second abstraction collects, for each )ariable, the )al e associated +ith the )ariable in each en)ironment( It discards information s ch as, \\. has the )al e 3 +hen y has the )al e FJJ, replacing it +ith \\. may ha)e the )al e 3 sometime and y may ha)e the )al e F sometime(JJ( It effecti)ely deco ples associations bet+een )ariables( This second abstraction associates a set of )al es +ith a )ariable, b t this set may be any of an infinite n mber of sets and it may contain an infinite n mber of )al es( In general, this precl des either a finite comp tation of the sets or a finite representation of them( The third abstraction defines a type system that has a finite representation( This abstraction discards information by increasing the set associated +ith a )ariable 3that is, ma0ing the set less precise4 ntil it matches a type( This third model can be implemented +ith standard iterati)e data flo+ analysis techniI es( This chapter ass mes that an Icon program consists of a single proced re and that all in)ocations are to b ilt-in f nctions( It also ass mes that there are no co-e.pressions beyond the main co-e.pression( *ee Chapter 1: for information on ho+ to e.tend the abstractions to m ltiple proced res and m ltiple co-e.pressions(

1).& Collecting ,emantics


The collecting semantics of an Icon program is defined in terms of a flow graph of the program( 1 flo+ graph is a directed graph sed to represent the flo+ of control in a program( #odes in the graph represent the e.ec table primiti)es in the program( 1n edge e.ists from node + to node ( if it is possible for e.ec tion to pass directly from the primiti)e represented by node + to the primiti)e represented by node (( Co sot and Co sot Q(absintrp(R pro)e that the collecting semantics of a program can be represented as the least fi.ed point of a set of eI ations defined o)er the edges of the programJs flo+ graph( These eI ations operate on sets of en)ironments(

1#"

$or an e.ample of a flo+ graph, consider the Icon program


procedure main() every write(1 to 3) end

The diagram belo+ on the left sho+s the abstract synta. tree for this proced re, incl ding the implicit fail at the end of the proced re( The in)o0e node in the synta. tree represents proced re in)ocation( Its first arg ment m st e)al ate to the proced re to be in)o0ed, in this case the first arg ment is the global )ariable +rite( The rest of the arg ments are sed as the arg ments to the proced re( pfail represents proced re fail re 3as opposed to e.pression fail re +ithin a proced re4( #odes corresponding to operations that prod ce )al es are n mbered for p rposes e.plained belo+( 1 flo+ graph can be deri)ed from the synta. tree( This is sho+n on the right(

The node labeled proced re main is the start node for the proced re, it performs any necessary initiali7ations to establish the e.ec tion en)ironment for the proced re( The edge from invoke to to is a res mption path ind ced by the control str ct re every( The path from to to pfail is the fail re path for to( It is a for+ard e.ec tion path rather than a res mption path beca se the compo nd e.pression 3indicated by ,4 limits bac0trac0ing o t of its left-hand s b-e.pression( Chapter F describes ho+ to determine the edges of the flo+ graph for an Icon program( /oth the standard semantics and the abstract semantics m st deal +ith the intermediate res lts of e.pression e)al ation( 1 temporary-)ariable model is sed beca se it is more con)enient for this analysis than a stac0 model( This decision is nrelated to the se of a temporary-)ariable model in the compiler( This analysis ses a tri)ial assignment of temporary )ariables to intermediate res lts( Temporary )ariables are not re sed( Cach node that prod ces a res lt is assigned some temporary )ariable ri in the en)ironment( 1ss ming that temporary )ariables are assigned to the e.ample according to the node n mbering, the to operation has the effect of
r3 := r4 to r5

1##

C.pressions that represent alternate comp tations m st be assigned the same temporary )ariable, as in the follo+ing e.ample for the s be.pression . @P 32a2 O 2b24( The synta. tree belo+ on the left and the and the flo+ graph are sho+n on the right(

The if and case control str ct res are handled similarly( In addition to temporary )ariables for intermediate res lts, some generators may need additional temporary )ariables to hold internal states d ring s spension( It is easy to de)ise a scheme to allocate them +here they are needed, details are not presented here( The synta. tree is 0ept d ring abstract interpretation and sed to determine the temporary )ariables associated +ith an operation and its operands( The eI ations that determine the collecting semantics of the program are deri)ed directly from the standard semantics of the lang age( The set of en)ironments on an edge of the flo+ graph is related to the sets of en)ironments on edges coming into the node at the head of this edge( This relationship is deri)ed by applying the meaning of the node 3in the standard semantics4 to each of the incoming en)ironments( It reI ires a rather comple. en)ironment to capt re the f ll operational semantics 3and collecting semantics4 of a lang age li0e Icon( $or e.ample, the en)ironment needs to incl de a representation of the e.ternal file system( =o+e)er, later abstractions only se the fact that the f nction read prod ces strings( This disc ssion ass mes that it is possible to represent the file system in the en)ironment, b t does not gi)e a representation( 6ther comple.ities of the en)ironment are disc ssed later( $or the moment, e.amples only sho+ the bindings of )ariables to nstr ct red 3atomic4 )al es( 1s an e.ample of en)ironments associated +ith the edges of a flo+ graph, consider the assignment at the end of the follo+ing code fragment( The comments in the if e.pression are assertions that are ass med to hold at those points in the e.ample(
if x = 7 then { ... # x is 7 and y is 3 } else { ... # (x is null and y is 1) or (x is "abc" and y is 2) }

2&&

x := y + 2

/eca se of the preceding if e.pression, there are t+o paths reaching the assignment( The diagram belo+ sho+s the flo+ graph and accompanying en)ironments for the e.pression, the diagram ignores the fact that the assignment e.pression reI ires se)eral primiti)e operations to implement(

$or a conditional e.pression, an incoming en)ironment is propagated to the path that it +o ld ca se e.ec tion to ta0e in the standard semantics( This reI ires disting ishing the paths to be ta0en on fail re 3bac0trac0ing paths4 from those to be ta0en on s ccess( The follo+ing diagram sho+s an e.ample of this(

In general there may be se)eral possible bac0trac0ing paths( The en)ironments in the standard and collecting semantics need to incl de a stac0 of c rrent bac0trac0ing points and control flo+ information, and the flo+ graph needs instr ctions to maintain this stac0( The Icon interpreter system described in Part I is an e.ample of ho+ this information can be maintained( =o+e)er, the first abstraction to the collecting semantics eliminates the need for this information, so the information is not presented in detail here(

1).( Model 1; 5liminating Control %low nformation


The first abstraction in)ol)es ta0ing the nion of the en)ironments propagated along all the fail re paths from a node in the collecting semantics and propagating that nion along each of the fail re paths in the ne+ abstraction( This abstraction eliminates the stac0 of bac0trac0ing points from the en)ironment( 1 more formal definition for this model reI ires ta0ing a closer loo0 at Icon data )al es, especially those )al es +ith internal str ct re( In order to handle Icon data ob5ects +ith pointer semantics, an en)ironment needs more than )ariable bindings( This fact is important to type inferencing( The problem is handled by incl ding t+o components in the

2&1

en)ironment( The first is the store, +hich maps )ariables to )al es( 'ariables incl de named )ariables, temporary )ariables, and structure )ariables( #amed )ariables correspond to program identifiers( Temporary )ariables hold intermediate res lts as disc ssed abo)e( *tr ct re )ariables are elements of str ct res s ch as lists( #ote that the sets of named )ariables and temporary )ariables are each finite 3based on the ass mption that a program consists of a single non-rec rsi)e proced re, as mentioned earlier, this ass mption is remo)ed in Chapter 1:4, b t for some non-terminating programs, the set of str ct re )ariables may be infinite( +rogram )ariables incl de both named )ariables and str ct re )ariables b t not temporary )ariables( 'al es incl de atomic data )al es s ch as integers, csets, and strings( They also incl de pointers that reference ob5ects +ith pointer semantics( In addition to the )al es 5 st described, temporary )ariables may contain references to program )ariables( These variable references may be sed by assignments to pdate the store or they may be dereferenced by other operations to obtain the )al es stored in the )ariables( The second part of the en)ironment is the heap( It maps pointers to the corresponding data ob5ects 3this differs from the heap in the Icon implementation in that that heap also contains some data ob5ects that do not ha)e pointer semantics4( $or simplicity, the only data type +ith pointer semantics incl ded in this disc ssion is the list( 1 list is a partial mapping from integers to )ariables( Eepresenting other data types +ith pointer semantics is straightfor+ard, this is disc ssed in Chapter 1:( The first abstraction is called >odel 1( The notations en)irQnR, storeQnR, and heapQnR refer to the sets of possible en)ironments, stores, and heaps respecti)ely in model n( $or e.ample, en)irQ1R is the set of possible en)ironments in the first abstraction( In the follo+ing set of definitions, , ] - is the set of ordered pairs +here the first )al e in the pair is from , and the second )al e is from -( , - is the set of partial f nctions from , to -( The definition of the set possible en)ironments for model 1 is
envir[1] = store[1] heap[1] store[1] = variables values values = integers strings ... pointers variables heap[1] = pointers lists, where lists = integers variables

$or e.ample, the e.pression


a := ["abc"]

creates a list of one element +hose )al e is the string abc and assigns the list to the )ariable a( &et p1 be the pointer to the list and let v1 be the 3anonymo s4 )ariable +ithin the list( The res lting en)ironment, e en)irQ1R, might be
e = (s,h), where s store[1], h heap[1] s(a) = p1 s(v1) = "abc" h(p1) = L1, where L1 lists L1(1) = v1

If the statement
a[1] := "xyz"

is e.ec ted, the s bscripting operation dereferences a prod cing p1, then ses the heap to find &1, +hich it applies to 1 to prod ce the res lt v1( The only change in the en)ironment at this point is to temporary )ariables that are not sho+n( The assignment then pdates the store, prod cing

2&2

e1 = (s1, h) s1(a) = p1 s1(v1) = "xyz"

1ssignment does not change the heap( 6n the other hand, the e.pression
put(a, "xyz")

adds the string .y7 to the end of the list, if it is e.ec ted in the en)ironment e, it alters the heap along +ith adding a ne+ )ariable to the store(
e1 = (s1, h1) s1 (a) = p1 s1 (v1) = "abc" s1 (v2) = "xyz" h1 (p1) = L2 L2(1) = v1 L2(2) = v2

If a formal model +ere de)eloped for the collecting semantics, it +o ld ha)e an en)ironment similar to the one in >odel 1( =o+e)er, it +o ld need a third component +ith +hich to represent the bac0trac0ing stac0(

1).) Model $; -eco#pling 'aria.les


The ne.t appro.imation to Icon semantics, >odel 2, ta0es all the )al es that a )ariable might ha)e at a gi)en program point and gathers them together( In general, a )ariable may ha)e the same )al e in many en)ironments, so this, in some sense, red ces the amo nt of space reI ired to store the information 3tho gh the space may still be nbo nded4( The ^cost_ of this red ction of storage is that any information abo t relationship of )al es bet+een )ariables is lost( >odel 2 is also defined in terms of en)ironments, stores, and heaps, altho gh they are different from those of >odel 1( 1 store in >odel 2 maps sets of )ariables to sets of )al es, each res lting set contains the )al es associated +ith the corresponding )ariables in en)ironments in >odel 1( *imilarly, a heap in >odel 2 maps sets of pointers to sets of lists, each of these sets contains the lists associated +ith the corresponding pointers in en)ironments in >odel 1( 1n en)ironment in >odel 2 contains a store and a heap, b t nli0e in >odel 1, there is only one of these en)ironments associated +ith each program point( The en)ironment is constr cted so that it effecti)ely ^contains_ the en)ironments in the set associated +ith the point in >odel 1( The definition of >odel 2 is
envir[2] = store[2] heap[2] store[2] = 2variables 2values heap[2] = 2pointers 2lists

In >odel 1, operations prod ce elements from the set values( In >odel 2, operations prod ce s bsets of this set( It is in this model that read is ta0en to prod ce the set of all strings and that the e.istence of an e.ternal file system can be ignored( * ppose a program point is annotated +ith the set containing the follo+ing t+o en)ironments from >odel 1(
e1,e2 envir[1] e1 = (s1, h1) s1(x) = 1 s1(y) = p1 h1(p1) = L1

2&3

e2 s2 s2 h2

= (s2, h2) (x) = 2 (y) = p1 (p1) = L2

Under >odel 2 the program point is annotated +ith the single en)ironment ` en)irQ2R, +here
= (,) ({x}) = {1,2} ({y}) = {p1} ({x, y}) = {1, 2, p1} ({p1}) = {L1, L2}

#ote that a store in >odel 2 is distrib ti)e o)er nion( That is,
(X Y) = (X) (Y)

so listing the res lt of 3Y., ya4 is red ndant( 1 heap in >odel 2 also is distrib ti)e o)er nion( In going to >odel 2 information is lost( In the last e.ample, the fact that . P 1 is paired +ith p1 P &1 and . P 2 is paired +ith p1 P &2 is not represented in >odel 2( J st as read is e.tended to prod ce a set of )al es, so are all other operations( These 2e.tended2 operations are then sed to set p the eI ations +hose sol tion formally defines >odel 2( This e.tension is straightfor+ard( $or e.ample, the res lt of applying a nary operator to a set is the set obtained by applying the operator to each of the elements in the operand( The res lt of applying a binary operator to t+o sets is the set obtained by applying the operator to all pairs of elements from the t+o operands( 6perations +ith more operands are treated similarly( $or e.ample
{1, 3, 5} + {2, 4} = {1 + 2, 1 + 4, 3 + 2, 3 + 4, 5 + 2, 5 + 4} = {3, 5, 5, 7, 7, 9} = {3, 5, 7, 9}

The loss of information mentioned abo)e affects the calc lation of en)ironments in >odel 2( * ppose the addition in the last e.ample is from
z := x + y

and that >odel 1 has the follo+ing three en)ironments at the point before the calc lation
[x = 1, y = 2, z = 0] [x = 3, y = 2, z = 0] [x = 5, y = 4, z = 0]

1fter the calc lation the three en)ironments +ill be


[x = 1, y = 2, z = 3] [x = 3, y = 2, z = 5] [x = 5, y = 4, z = 9]

If these latter three en)ironments are translated into an en)ironment of >odel 2, the res lt is
[x = {1, 3, 5}, y = {2, 4}, z = {3, 5, 9}]

=o+e)er, +hen doing the comp tation sing the semantics of N in >odel 2, the )al e for 7 is Y3, A, F, :a( The sol tion to the eI ations in >odel 2 o)erestimates 3that is, gi)es a conser)ati)e estimate for4 the )al es obtained by comp ting a sol tion sing >odel 1 and translating it into the domain of >odel 2(

2&4

Consider the follo+ing code +ith respect to the semantics of assignment in >odel 2( 31ss me that the code is e.ec ted once, so only one list is created(4
x := [10, 20] i := if read() then 1 else 2 x[i] := 30

1fter the first t+o assignments, the store maps . to a set containing one pointer and maps i to a set containing 1 and 2( The third assignment is not as straightfor+ard( Its left operand e)al ates to t+o )ariables, the most that can be said abo t one of these )ariables after the assignment is that it might ha)e been assigned 30( If 3s, h4 is the en)ironment after the third assignment then
s({x}) = { p1 } s({i}) = {1, 2} s({v1}) = {10, 30} s({v2}) = {20, 30} h({p1}) = {L1} L1(1) = v1 L1(2) = v2

Clearly all assignments co ld be treated as wea" updates Q(pntstr(R, +here a +ea0 pdate is an pdate that may or may not ta0e place( =o+e)er, this +o ld in)ol)e discarding too m ch information, assignments +o ld only add to the )al es associated +ith )ariables and not replace the )al es( Therefore assignments +here the left hand side e)al ates to a set containing a single )ariable are treated as special cases( These are implemented as strong updates(

1)./ Model &; A %inite Type ,ystem


The en)ironments in >odel 2 can contain infinite amo nts of information, as in the program
x := 1 repeat x +:= 1

+here the set of )al es associated +ith . in the loop consists of all the co nting n mbers( /eca se eI ations in >odel 2 can in)ol)e arbitrary arithmetic, no algorithm can find the least fi.ed point of an arbitrary set of these eI ations( The final step is to impose a finitely representable type system on )al es( 1 type is a 3possibly infinite4 set of )al es( The type system presented here incl des three classifications of basic types( The first classification consists of the Icon types +itho t pointer semantics@ integers, strings, csets, etc( The second classification gro ps pointers together according to the le.ical point of their creation( This is similar to the method sed to handle rec rsi)e data str ct res in Jones and > chnic0 Q(analrcs)(R( Consider the code
every insert(x, [1 to 5])

If this code is e.ec ted once, fi)e lists are created, b t they are all created at the same point in the program, so they all belong to the same type( The int ition behind this choice of types is that str ct res created at the same point in a program are li0ely to ha)e components of the same type, +hile str ct res created at different points in a program may ha)e components of different types( The third classification of basic types handles )ariable references( Cach named )ariable and temporary )ariable is gi)en a type to itself( Therefore, if a is a named )ariable, Yaa is a

2&5

type( *tr ct re )ariables are gro ped into types according to the program point +here the pointer to the str ct re is created( This is not necessarily the point +here the )ariable is created, in the follo+ing code, a pointer to a list is created at one program point, b t )ariables are added to the list at different points
x := [] push(x, 1) push(x ,2)

Eeferences to these )ariables are gro ped into a type associated +ith the program point for QR, not the point for the corresponding p sh( If a program contains 0 non-str ct re )ariables and there are n locations +here pointers can be created, then the basic types for the program are integer, string, (((, P 1, (((, Pn, '1, (((, 'n, Y)1a, (((, Y)0a +here Pi is the pointer type created at location i, 'i is the )ariable type associated +ith Pi, and )i is a named )ariable or a temporary )ariable( /eca se programs are le.ically finite they each ha)e a finite n mber of basic types( The set of all types for a program is the smallest set that is closed nder nion and contains the empty set along +ith the basic types@ types P YYa, integers, strings,(((, 3integers strings4,(((, 3integers strings ((( Y)0a4a >odel 3 replaces the arbitrary sets of )al es of >odel 2 by types( This replacement red ces the precision of the information, b t allo+s for a finite representation and allo+s the information to be comp ted in finite time( In >odel 3, both the store and the heap map types to types( This store is referred to as the type store( The domain of type store is variable types, that is, those types +hose only )al es are )ariable references( *imilarly, the domain of the heap is pointer types( Its range is the set types containing only str ct re )ariables( 1 set of )al es from >odel 2 is con)erted to a type in >odel 3 by mapping that set to the smallest type containing it( $or e.ample, the set
{1, 4, 5, "23", "0"}

is mapped to
integer string

The definition of en)irQ3R is


envir[3] = store[3] heap[3] store[3] = variable-types types heap[3] = pointer-types structure-variable-types types 2values variable-types types structure-variable-types variable-types pointer-types types

There is e.actly one )ariable type for each pointer type in this model( The heap simply consists of this one-to-one mapping, the heap is of the form
h( Pi ) = Vi

This mapping is in)ariant o)er a gi)en program( Therefore, the type eI ations for a program can be defined o)er store Q3R rather than en)irQ3R +ith the heap embedded +ithin the type eI ations( * ppose an en)ironment from >odel 2 is
e envir[2]

2&

e = (s, h) s({a}) = { p1 , p2} s({v1}) = {1, 2} s({v2}) = {1} s({v3}) = {12.03} h({p1}) = {L1, L2} h({p2}) = {L3} L1(1) = v1 L2(1) = v1 L2(2) = v2 L3(1) = v3

* ppose the pointers p1 and p2 are both created at program point 1( Then the associated pointer type is P1 and the associated )ariable type is '1( The corresponding en)ironment in >odel 3 is
envir[3] = (, ) ({a}) = P1 (V1) = integer real (P1) = V1

The collecting semantics of a program establishes a set of 3possibly4 rec rsi)e eI ations bet+een the sets of en)ironments on the edges of the programJs flo+ graph( The collecting semantics of the program is the least fi.ed point of these eI ations in +hich the set on the edge entering the start state contains all possible initial en)ironments( *imilarly, type inferencing establishes a set of rec rsi)e eI ations bet+een the type stores on the edges of the flo+ graph( The least fi.ed point of these type inferencing eI ations is comp table sing iterati)e methods( This is disc ssed in Chapter 1:( The fact that these eI ations ha)e sol tions is d e to the fact that the eI ations in the collecting semantics ha)e a sol tion and the fact the each abstraction maintains the ^str ct re_ of the problem, simply discarding some details( Chapter 1: also e.tends type inferencing to handle the entire Icon lang age( Chapter 22 ses the information from type inferencing to optimi7e the generated code(

2&!

Chapter 1 : Liveness 2nal8sis of Intermediate ,alues


The maintenance of intermediate )al es d ring e.pression e)al ation in the Icon programming lang age is more complicated than it is for con)entional lang ages s ch as C and Pascal( 6J/agy e.plains this in her dissertation Q(trGG-31(R@ 2"enerators prolong the lifetime of temporary )al es( $or e.ample, in i P find3s1,s24 the operands of the comparison operation cannot be discarded +hen find prod ces its res lt( If find is res med, the comparison is performed again +ith s bseI ent res lts from find3s1,s24, and the left operand m st still be a)ailable(2 In some implementation models, it is eI ally important that the operands of find still be a)ailable if that f nction is res med 3this depends on +hether the operand locations are sed d ring res mption or +hether all needed )al es are sa)ed in the local state of the f nction4( 1s noted in Chapter 1<, a stac0-based model handles the lifetime problem dynamically( =o+e)er, a temporary-)ariable model li0e the one sed in this compiler reI ires 0no+ledge at compile-time of the lifetime of intermediate )al es( In a straightfor+ard implementation of con)entional lang ages, li)eness analysis of intermediate )al es is tri)ial@ an intermediate )al e is comp ted in one place in the generated code, is sed in one place, and is li)e in the contig o s region bet+een the comp tation and the se( In s ch lang ages, determining the lifetime of intermediate )al es only becomes complicated +hen certain optimi7ations are performed, s ch as code motion and common s be.pression elimination across basic bloc0s Q(dragonb0,progfl+(R( This is not tr e in Icon( In the presence of goal-directed e)al ation, the lifetime of an intermediate )al e can e.tend beyond the point of se( C)en in a straightfor+ard implementation, li)eness analysis is not tri)ial( In its most general form, needed in the presence of the optimi7ations mentioned abo)e, li)eness analysis reI ires iterati)e methods( =o+e)er, goal-directed e)al ation imposes eno gh str ct re on the li)eness problem that, at least in the absence of optimi7ations, iterati)e methods are not needed to sol)e it( This chapter presents a simple and acc rate method for comp ting li)eness information for intermediate )al es in Icon( The analysis is formali7ed in an attrib te grammar(

1/.1 mplicit "oops


"oal-directed e)al ation e.tends the lifetime of intermediate )al es by creating implicit loops +ithin an e.pression( In 6J/agyJs e.ample, the start of the loop is the generator find and the end of the loop is the comparison that may fail( 1n intermediate )al e may be sed +ithin s ch a loop, b t if its )al e is comp ted before the loop is entered, it is not recomp ted on each iteration and the temporary )ariable m st not be re sed ntil the loop is e.ited( The follo+ing fragment of C code contains a loop and is therefore analogo s to code generated for goal-directed e)al ation( It is sed to demonstrate the li)eness information

2&"

needed by a temporary )ariable allocator( In the e.ample, v1 thro gh v. represent intermediate )al es that m st be assigned to program )ariables(
v1 = f1(); while (--v1) { v2 = f2(); v3 = v1 + v2; f3(v3); } v4 = 8;

*eparate )ariables m st be allocated for )1 and v2 beca se they are both needed for the addition( =ere, . is chosen for v1 and y is chosen for v2(
x = f1(); while (--x) { y = f2(); v3 = x + y; f3(v3); } v4 = 8;

. cannot be sed to hold v3, beca se . is needed in s bseI ent iterations of the loop( Its lifetime m st e.tend thro gh the end of the loop( y, on the other hand, can be sed beca se it is recomp ted in s bseI ent iterations( Cither )ariable may be sed to hold v.(
x = f1(); while (--x) { y = f2(); y = x + y; f3(y); } x = 8;

/efore temporary )ariables can be allocated, the e.tent of the loops created by goaldirected e)al ation m st be estimated( * ppose 6J/agyJs e.ample
i = find(s1, s2)

appears in the follo+ing conte.t


procedure p(s1, s2, i) if i = find(s1, s2) then return i + *s1 fail end

The simplest and most pessimistic analysis ass mes that a loop can appear any+here +ithin the proced re, reI iring the concl sion that an intermediate )al e in the e.pression may li)e to the end of the proced re( ChristopherJs simple analysis Q(tccompile(R notices that the e.pression appears +ithin the control cla se of an if e.pression( This is a bo nded conte.t, implicit loops cannot e.tend beyond the end of the control cla se( =is allocation scheme re ses, in s bseI ent e.pressions, temporary )ariables sed in this control cla se( =o+e)er, it does not determine +hen temporary )ariables can be re sed +ithin the control cla se itself( The analysis presented here locates the operations +ithin the e.pression that can fail and those that can generate res lts( It ses this information to acc rately determine the loops +ithin the e.pression and the intermediate )al es +hose lifetimes are e.tended by those loops(

2&#

1/.$ "i6eness Analysis


It is instr cti)e to loo0 at a specific e.ample +here intermediate )al es m st be retained beyond 3in a le.ical sense4 the point of their se( The follo+ing e.pression employs goaldirected e)al ation to conditionally +rite sentences in the data str ct re . to an o tp t file( * ppose f is either a file or n ll( If f is a file, the sentences are +ritten to it, if f is n ll, the sentences are not +ritten(
every write(\f, !x, ".")

In order to a)oid the complications of control str ct res at this point in the disc ssion, the follo+ing eI i)alent e.pression is sed in the analysis@
write(\f, !x, ".") & &fail

This e.pression can be con)erted into a seI ence of primiti)e operations prod cing intermediate )al es 3v1, v2, (((4( This is sho+n in diagram( $or con)enience, the operations are e.pressed in Icon, e.cept that the assignments do not dereference their right-hand operands(

9hether or not the program )ariables and constants are act ally placed in temporary )ariables depends on the machine model, implementation con)entions, and +hat optimi7ations are performed( Clearly a temporary )ariable is not needed for Lfail( =o+e)er, temporary )ariables are needed if the s be.pressions are more comple., intermediate )al es are sho+n for all s be.pressions for e.planatory p rposes( 9hen Lfail is e.ec ted, the M operation is res med( This creates an implicit loop from the M to Lfail, as sho+n by the arro+ in the abo)e diagram( The I estion is@ 9hat intermediate )al es m st be retained p to Lfail? 1 more instr cti)e +ay to phrase the I estion is@ 1fter Lfail is e.ec ted, +hat intermediate )al es co ld be re sed +itho t being recomp ted? $rom the seI ence of primiti)e operations, it is clear that the re sed )al es incl de v1 and v3, and, if the element generation operator, M, references its arg ment after res mption, then the re sed )al es incl de v.. v2 is not sed +ithin the loop, v/ and v0 are recomp ted +ithin the loop, and v1 and v2 are not sed( The lines in the diagram to the left of the code indicate the lifetime of the intermediate )al es( The dotted portion of each line represents the region of the lifetime beyond +hat +o ld e.ist in the absence of bac0trac0ing( &i)eness information co ld be comp ted by ma0ing the implicit loops e.plicit then performing a standard li)eness analysis in the form of a global data flo+ analysis( That is nnecessarily e.pensi)e( There is eno gh str ct re in this partic lar li)eness problem that it can be sol)ed d ring the simple analysis reI ired to locate the implicit loops ca sed by goal-directed e)al ation(

21&

*e)eral concepts are needed to describe analyses in)ol)ing e.ec tion order +ithin Icon e.pressions( 3orward execution order is the order in +hich operations +o ld be e.ec ted at r n time in the absence of goal-directed e)al ation and e.plicit loops( "oal-directed e)al ation in)ol)es both fail re and the res mption of s spended generators( The control cla se of an if-then-else e.pression may fail, b t instead of res ming a s spending generator, it ca ses the else cla se to be e.ec ted( This fail re res lts in for+ard e.ec tion order( $or+ard e.ec tion order imposes a partial ordering on operations( It prod ces no ordering bet+een the then and the else cla ses of an if e.pression( 4ac"trac"ing order is the re)erse of for+ard e.ec tion order( This is d e to the &I$6 res mption of s spended generators( The bac0+ard flo+ of control ca sed by looping control str ct res does not contrib te to this li)eness analysis 3intermediate res lts sed +ithin a looping control str ct re are also comp ted +ithin the loop4, b t is dealt +ith in later chapters( The e)ery control str ct re is generally )ie+ed as a looping control str ct re( =o+e)er, it simply introd ces fail re( &ooping only occ rs +hen it is sed +ith a generati)e control cla se, in +hich case the looping is treated the same as goal-directed e)al ation( 1 notation that emphasi7es intermediate )al es, s be.pressions, and e.ec tion order is helpf l for nderstanding ho+ li)eness is comp ted( /oth postfi. notation and synta. trees are inadeI ate( 1 postfi. notation is good for sho+ing e.ec tion order, b t tends to obsc re s be.pressions( The synta. tree of an e.pression sho+s s be.pressions, b t e.ec tion order m st be e.pressed in terms of a tree +al0( In both representations, intermediate )al es are implicit( $or this disc ssion, an intermediate representation is sed( 1 s be.pression is represented as a list of e.plicit intermediate )al es follo+ed by the operation that ses them, all enclosed in o)als( /elo+ each intermediate )al e is the s be.pression that comp tes it( This representation is referred to as a postfix tree( The postfi. tree for the e.ample abo)e is@

In this notation, the for+ard e.ec tion order of operations 3+hich incl des constants and references to program )ariables4 is left-to-right and the bac0trac0ing order is right-to-left( In this e.ample, the bac0trac0ing order is Lfail, in)o0e, 2(2, M, ., b, f, and +rite( 1s e.plained abo)e, the se of an intermediate )al e m st appear in an implicit loop for the )al e to ha)e an e.tended lifetime( T+o e)ents are needed to create s ch a loop( $irst, an operation m st fail, initiating bac0trac0ing( *econd, an operation m st be res med, ca sing e.ec tion to proceed for+ard again( This analysis comp tes the ma.im m lifetime of intermediate )al es in the e.pression, so it only needs to comp te the rightmost operation 3+ithin a bo nded e.pression4 that can fail( This represents the end of the farthest reaching loop( 6nce e.ec tion proceeds beyond this point, no intermediate )al e can be re sed(

211

The intermediate )al es of a s be.pression are sed at the end of the s be.pression( $or e.ample, in)o0e ses the intermediate )al es v1, v3, v/, and v0, the follo+ing fig re sho+s these intermediate res lts and the operation in isolation(

In order for these ses to be in a loop, bac0trac0ing m st be initiated from o tside, that is, beyond the s be.pression 3in the e.ample, only Lfail and L are beyond the s be.pression4( In addition, for an intermediate )al e to ha)e an e.tended lifetime, the beginning of the loop m st start after the intermediate )al e is comp ted( T+o conditions may create the beginning of a loop( $irst, the operation itself may be res med( In this case, e.ec tion contin es for+ard +ithin the operation( It may re se any of its operands and none of them are recomp ted( The operation does not ha)e to act ally generate more res lts( $or e.ample, re)ersible s+ap 3the operator c-T4 can be res med to re se both of its operands, b t it does not generate another res lt( 9hether an operation act ally re ses its operands on res mption depends on its implementation( In the Icon compiler, operations implemented +ith a C f nction sing the standard calling con)entions al+ays se copies of operands on res mption, b t implementations tailored to a partic lar se often reference operand locations on res mption( &i)eness analysis is presented here as if all operations re se their operands on res mption( In the act al implementation, li)eness analysis comp tes a separate lifetime for )al es sed internally by operations and the code generator decides +hether this lifetime applies to operands( This internal lifetime may also be sed +hen allocating tended descriptors for )ariables declared local to the in-line code for an operation( The beha)ior of the temporary-)ariable model presented in this dissertation can be compared +ith one de)eloped by #ilsen and >artine0 Q(martine0(R, it also relies on the li)eness analysis described in this chapter( The second +ay to create the beginning of a loop is for a s be.pression to generate res lts( C.ec tion contin es for+ard again and any intermediate )al es to the left of the generati)e s be.pression may be re sed +itho t being recomp ted( Eemember, bac0trac0ing is initiated from o tside the e.pression( * ppose an e.pression that can fail is associated +ith v0, in the pre)io s fig re( This creates a loop +ith the generator associated +ith v/( =o+e)er, this partic lar loop does not incl de in)o0e and does not contrib te to the re se of v1 or v3( 1 res mable operation and generati)e s be.pressions are all resumption points +ithin an e.pression( 1 simple r le can be sed to determine +hich intermediate )al es of an e.pression ha)e e.tended lifetimes@ If the e.pression can be res med, the intermediate )al es +ith e.tended lifetimes consist of those to the left of the rightmost res mption point of the e.pression( This r le refers to the \\top le)elJJ intermediate )al es( The r le m st be applied rec rsi)ely to s be.pressions to determine the lifetime of lo+er le)el intermediate )al es( It sometimes may be necessary to ma0e conser)ati)e estimates of +hat can fail and of res mption points 3for li)eness analysis, it is conser)ati)e to o)erestimate +hat can fail or be res med4( $or e.ample, in)ocation may or may not be res mable, depending on +hat is being in)o0ed and, in general, it cannot be 0no+n ntil r n time +hat is being in)o0ed 3for the p rposes of this e.ample analysis, it is ass med that the )ariable +rite is not changed any+here in the program4(

212

In the e.ample, the rightmost operation that can fail is Lfail( Ees mption points are M and the s be.pressions corresponding to the intermediate )al es v/ and v1( 6nce the res mption points ha)e been identified, the r le for determining e.tended lifetimes can be applied( If there are no res mption points in an e.pression, no intermediate )al es in that e.pression can be re sed( 1pplying this r le to the postfi. tree abo)e yields v1, v3, and v. as the intermediate )al es that ha)e e.tended lifetimes( *imilar techniI es can be sed for li)eness analysis of Prolog programs, +here goaldirected e)al ation also creates implicit loops( 6ne difference is that a Prolog cla se is a linear seI ence of calls( It does not need to be \\lineari7edJJ by constr ction a postfi. tree( 1nother difference is that all intermediate )al es in Prolog programs are stored in e.plicit )ariables( 1 Prolog )ariable has a lifetime that e.tends to the right of its last se if an implicit loops starts after the )ariableJs first se and ends after the )ariableJs last se(

1/.& An Attri.#te 9rammar


To cast this approach as an attrib te grammar, an e.pression sho ld be tho ght of in terms of an abstract synta. tree( The transformation from a postfi. tree to a synta. tree is tri)ial( It is accomplished by deleting the e.plicit intermediate )al es( 1 synta. tree for the e.ample is@

*e)eral interpretations can be gi)en to a node in a synta. tree( 1 node can be )ie+ed as representing either an operation, an entire s be.pression, or an intermediate )al e( This analysis associates fo r attrib tes +ith each node 3this ignores attrib tes needed to handle brea0 e.pressions4( The goal of the analysis is to prod ce the lifetime attrib te( The other three attrib tes are sed to propagate information needed to comp te the lifetime(

res mer is either the rightmost operation 3represented as a node4 that can initiate bac0trac0ing into the s be.pression or it is n ll if the s be.pression cannot be res med(

213

failer is related to res mer( It is the rightmost operation that can initiate bac0trac0ing that can contin e past the s be.pression( It is the same as res mer, nless the s be.pression itself contains the rightmost operation that can fail( gen is a boolean attrib te( It is tr e if the s be.pression can generate m ltiple res lts if res med( lifetime is the operation beyond +hich the intermediate )al e is no longer needed( It is either the parent node, the res mer of the parent node, or n ll( The lifetime is the parent node if the )al e is ne)er re sed after e.ec tion lea)es the parent operation( The lifetime is the res mer of the parent if the parent operation or a generati)e sibling to the right can be res med( 1 lifetime of n ll is sed to indicate that the intermediate )al e is ne)er sed( $or e.ample, the )al e of the control cla se of an if e.pression is ne)er sed(

1ttrib te comp tations are associated +ith prod ctions in the grammar( The attrib te comp tations for failer and gen are al+ays for the non-terminal on the left-hand side of the prod ction( These )al es are then sed at the parent prod ction, they are effecti)ely passed p the synta. tree( The comp tations for res mer and lifetime are al+ays for the attrib tes of non-terminals on the right-hand side of the prod ction( res mer is then sed at the prod ctions defining these non-terminals, it is effecti)ely passed do+n the synta. tree( lifetime is s ally sa)ed 5 st for the code generator, b t it is sometimes sed by child nodes(

1/.( !rimary 58pressions


'ariables, literals, and 0ey+ords are primary e.pressions( They ha)e no s be.pressions, so their prod ctions contain no comp tations for res mer or lifetime( The attrib te comp tations for a literal follo+( 1 literal itself cannot fail, so bac0trac0ing only passes beyond it if the bac0trac0ing +as initiated before 3to the right of4 it( 1 literal cannot generate m ltiple res lts(
expr ::= literal { expr.failer := expr.resumer expr.gen := false }

1nother e.ample of a primary e.pression is the 0ey+ord Lfail( C.ec tion cannot contin e past Lfail, so it m st be the rightmost operation +ithin its bo nded e.pression that can fail( 1 pre-e.isting attrib te, node, is ass med to e.ist for e)ery symbol in the grammar( It is the node in the synta. tree that corresponds to the symbol(
expr ::= &fail { expr.failer := expr.node expr.gen := false }

1/.) Operations with ,#.e8pressions


1ddition pro)ides an e.ample of the attrib te comp tations in)ol)ing s be.pressions( The follo+ing diagram sho+s ho+ res mer, failer, and gen information +o ld be passed thro gh the postfi. tree(

214

This information +o ld then be sed to comp te lifetime information for v1 and v2( The ne.t fig re sho+s ho+ the attrib te information is act ally passed thro gh the synta. tree(

The lifetime attrib tes are comp ted for the roots of the s btrees for e.pr1 and e.pr2( The details of the attrib te comp tations depend, in part, on the characteristics of the indi)id al operation( 1ddition does not fail, so the rightmost res mer, if there is one, of e.pr2 is the rightmost res mer of the entire e.pression( The rightmost res mer of e.pr1 is the rightmost operation that can initiate bac0trac0ing that contin es past e.pr 2( 1ddition does not s spend, so the lifetime of the )al e prod ced by e.pr 2 only e.tends thro gh the operation 3that is, it al+ays is recomp ted in the presence of goal-directed e)al ation4( If e.pr2 is a generator, then the res lt of e.pr1 m st be retained for as long as e.pr2 might be res med( 6ther+ise, it need only be retained ntil the addition is performed( e.pr 1 is the first thing e.ec ted in the e.pression, so its failer is the failer for the entire e.pression( The e.pression is a generator if either e.pr1 or e.pr2 is a generator 3note that the operation O is logical or, not IconJs alternation control str ct re4@
expr ::= expr1 + expr2 { expr2.resumer := expr.resumer expr2.lifetime := expr.node expr1.resumer := expr2.failer if expr2.gen & (expr.resumer null) then expr1.lifetime := expr.resumer else expr1.lifetime := expr.node expr.failer := expr1.failer expr.gen := (expr1.gen | expr2.gen) }

!e.pr pro)ides an e.ample of an operation that can fail( If there is no rightmost res mer of the entire e.pression, it is the rightmost res mer of the operand( The lifetime of the

215

operand is simply the operation, by the same arg ment sed for e.pr 2 of addition( The comp tation of failer is also analogo s to that of addition( The e.pression is a generator if the operand is a generator@
expr ::= /expr1 { if expr.resumer = null then expr1.resumer := expr.node else expr1.resumer := expr.resumer expr1.lifetime := expr.node expr.failer := expr1.failer expr.gen := expr1.gen }

Me.pr differs from !e.pr in that it can generate m ltiple res lts( If it can be res med, the res lt of the operand m st be retained thro gh the rightmost res mer@
expr ::= !expr1 { if expr.resumer = null then { expr1.resumer := expr.node expr1.lifetime := expr.node } else { expr1.resumer := expr.resumer expr1.lifetime := expr.resumer } expr.failer := expr1.failer expr.gen := true
%

1/./ Control ,tr#ct#res


6ther operations follo+ the general pattern of the ones presented abo)e( Control str ct res, on the other hand, reI ire niI e attrib te comp tations( In partic lar, se)eral control str ct res bo nd s be.pressions, limiting bac0trac0ing( $or e.ample, not bo nds its arg ment and discards the )al e( If it has no res mer, then it is the rightmost operation that can fail( The e.pression is not a generator@
expr ::= not expr1 { expr1.resumer := null expr1.lifetime := null if expr.resumer = null then expr.failer := expr.node else expr.failer := expr.resumer expr.gen := false }

e.pr1, e.pr2 bo nds e.pr1 and discards the res lt( /eca se the res lt of e.pr 2 is the res lt of the entire e.pression, the code generator ma0es their res lt locations synonymo s( This is reflected in the lifetime comp tations( Indeed, all the attrib tes of e.pr 2 and those of the e.pression as a +hole are the same@
expr ::= expr1 ; expr2 { expr1.resumer := null expr1.lifetime := null expr2.resumer := expr.resumer

21

expr2.lifetime := expr.lifetime expr.failer := expr2.failer expr.gen := expr2.gen }

1 reasonable implementation of alternation places the res lt of each s be.pression into the same location@ the location associated +ith the e.pression as a +hole( This is reflected in the lifetime comp tations( The res mer of the entire e.pression is also the res mer of each s be.pression( /ac0trac0ing o t of the entire e.pression occ rs +hen bac0trac0ing o t of e.pr2 occ rs( This e.pression is a generator@
expr ::= expr1 | expr2 { expr2.resumer:= expr.resumer expr2.lifetime := expr.lifetime expr1.resumer := expr.resumer expr1.lifetime := expr.lifetime expr.failer := expr2.failer expr.gen := true }

The first operand of an if e.pression is bo nded and its res lt is discarded( The other t+o operands are treated similar to those of alternation( /eca se there are t+o independent e.ec tion paths, the rightmost res mer may not be +ell-defined( =o+e)er, it is al+ays conser)ati)e to treat the res mer as if it is farther right than it really is, this 5 st means that an intermediate )al e is 0ept aro nd longer than needed( If there is no res mer beyond the if e.pression, b t at least one of the branches can fail, the fail re is treated as if it came from the end of the if e.pression 3represented by the node for the e.pression4( /eca se bac0trac0ing o t of an if e.pression is rare, this inacc racy is of little practical conseI ence( The if e.pression is a generator if either branch is a generator@
expr ::= if expr1 then expr2 else expr3 { expr3.resumer := expr.resumer expr3.lifetime := expr.lifetime expr2.resumer := expr.resumer expr2.lifetime := expr.lifetime expr1.resumer := null expr1.lifetime := null if expr.resumer = null & (expr1.failer null | expr2.failer null) then expr.failer := expr.node else expr.failer = expr.resumer expr.gen := (expr2.gen | expr3.gen) }

The do cla se of e)ery is bo nded and its res lt discarded( The control cla se is al+ays res med at the end of the loop and can ne)er be res med by anything else( The )al e of the control cla se is discarded( Ignoring brea0 e.pressions, the loop al+ays fails@
expr ::= every expr1 do expr2 { expr2.resumer := null expr2.lifetime := null expr1.resumer := expr.node expr1.lifetime := null expr.failer := expr.node expr.gen := false }

21!

=andling brea0 e.pressions reI ires some stac0-li0e attrib tes( These are similar to the ones sed in the control flo+ analyses described in 6J/agyJs dissertation Q(trGG-31(R and the ones sed to constr ct flo+ graphs in the original technical report on type inferencing( The attrib tes presented here can be comp ted +ith one +al0 of the synta. tree( 1t a node, s btrees are processed in re)erse e.ec tion order@ first the res mer and lifetime attrib tes of a s btree are comp ted, then the s btree is +al0ed( #e.t the failer and gen attrib tes for the node itself are comp ted, and the +al0 mo)es bac0 p to the parent node(

21"

Chapter 1!: $vervie% of the Compiler


13.1 Components of the Compiler
The Icon compiler is di)ided into t+o components@ a r n-time system and the compiler itself( This organi7ation is ill strated belo+( In the diagram, labeled bo.es represent programs, other te.t 3some of it delimited by braces4 represents files, and arro+s represent data flo+(

The r n-time system appears abo)e the dotted line and the compiler itself appears belo+ the line( The programs sho+n +ith the r n-time system are e.ec ted once +hen the r ntime system is installed or pdated( They b ild a data base, rt(db, and an ob5ect-code library, rt(a, for se by the compiler( The general definition of the term \\data baseJJ is sed here@ a collection of related data( rt(db is stored as a te.t file( It is accessed and manip lated in internal tables by the programs rtt and iconc( The rtt program is specific to the Icon compiler system and is described belo+( The C compiler and the library maintenance program are those nati)e to the system on +hich the Icon compiler is being sed( The format of the ob5ect-code library is dictated by the lin0er sed +ith the C compiler( The file rt(h contains C definitions shared by the ro tines in the r n-time system and code generated by the compiler( The diagram of the compiler itself reflects the fact that the Icon compiler ses a C compiler and lin0er as a bac0 end( =o+e)er, iconc a tomatically in)o0es these programs, so the Icon programmer sees a single tool that ta0es Icon so rce as inp t and prod ces an e.ec table file(

13.$ The *#n+time ,ystem


1s +ith the r n-time system for the interpreter, the r n-time system for the compiler implements IconJs operations( =o+e)er, the compiler has needs beyond those of the interpreter( In the interpreterJs r n-time system, all operations are implemented as C f nctions conforming to certain con)entions( The interpreter ses the same implementation

21#

of an operation for all ses of the operation( 9hile this approach res lts in acceptable performance for many Icon programs, the p rpose of an optimi7ing compiler is to obtain better performance( 1 ma5or goal in the de)elopment of iconc is to se information from type inferencing to tailor the parameter passing and parameter type con)ersions of an operation to partic lar ses of the operation and to place code in line +here appropriate( The compiler needs a mechanism to s pport this tailored operation in)ocation( In addition, the compiler needs information abo t the properties of operations for se in performing type inferencing and other analyses( In addition to s pporting the analyses and optimi7ations of iconc, there are se)eral other ma5or goals in the design of the compilerJs r n-time system( These incl de

*pecification of all information abo t an operation in one place( Use of one coding to prod ce both general and tailored implementation of an operation( Use of the pre-e.isting r n-time system as a basis for the ne+ one(

>ost of the design goals for the r n-time system +o ld best be ser)ed by a specialp rpose lang age in +hich to implement the r n-time system( * ch a lang age +o ld allo+ the properties of an operation needed by )ario s analyses to be either e.plicitly coded or easily inferred from parts of the code sed to prod ce an implementation of the operation( The lang age +o ld also allo+ easy recognition and manip lation of parts of the code that need to be tailored to indi)id al ses of an operation( In addition, the lang age +o ld pro)ide s pport for feat res of Icon s ch as its data types and goaldirected e)al ation( 9hile a special-p rpose lang age is consistent +ith most design goals, it is not consistent +ith sing the interpreterJs r n-time system +ritten in C as a basis for that of the compiler( 1 special-p rpose lang age also has the problem that it reI ires a large effort to implement( These conflicting goals are resol)ed +ith a lang age that is a compromise bet+een an ideal special-p rpose implementation lang age and C( The core of the lang age is C, b t the lang age contains special feat res for specifying those aspects of a r n-time operation that m st be dealt +ith by the compiler( This lang age is called the implementation language for the Icon compilerJs r n-time system( /eca se the implementation lang age is designed aro nd C, m ch of the detailed code for implementing an operation can be borro+ed from the interpreter system +ith only minor changes( The important facets of the implementation lang age are disc ssed here( 1 f ll description of the lang age can be fo nd in the reference man al for the lang age Q(ipdF:(R( The core material from this reference man al is incl ded as 1ppendi. 1 of this dissertation(

13.& The mplementation "ang#age


The implementation lang age is sed to describe the operators, 0ey+ords, and b ilt-in f nctions of Icon( In addition to these operations, the r n-time system contains ro tines to s pport other feat res of Icon s ch as general in)ocation, co-e.pression acti)ation, and storage management( These other ro tines are +ritten in C( The program rtt ta0es as inp t files containing operations coded in the implementation lang age and translates the operations into p re C( rtt also b ilds the data base, rt(db, +ith information abo t the operations( In addition to operations +ritten in the implementation lang age, rtt inp t may contain C f nctions( These C f nctions can se se)eral of the

22&

e.tensions a)ailable to the detailed C code in the operations( These e.tensions are translated into ordinary C code( 9hile not critical to the goals of the r n-time system design, the ability to se these e.tensions in other+ise ordinary C f nctions is )ery con)enient( The definition of an operation is composed of three layers( The o ter layer brac0ets the code for the operation( It consists of a header at the beginning of the code and the reser)ed +ord end at the end of the code( The header may be preceded by an optional description of the operation in the form of a string literal, this description is sed only for doc mentation( The second layer consists of type chec0ing and type con)ersion code( Type chec0ing code may be nested( The inner layer is the detailed C code, abstract type comp tations, and code to handle r n-time errors( 1n abstract type comp tation describes the possible side-effects and res lt types of the operation in a form that type inferencing can se( This feat re is needed beca se it is sometimes impractically diffic lt to ded ce this information from the C code( The code to handle r n-time errors is e.posed, that is, it is not b ried +ithin the detailed C code( /eca se of this, type inferencing can easily determine conditions nder +hich an operation terminates +itho t either prod cing a )al e or failing( 31 f rther reason for e.posing this code is e.plained in the implementation lang age reference man al in the section on scoping(4 1n operation header starts +ith one of the three reser)ed +ords operator, f nction, or 0ey+ord( The header contains a description of the operationJs result se uence, that is, ho+ many res lts it can prod ce( This incl des both the minim m and ma.im m n mber of res lts, +ith indicating an nbo nded n mber( In addition, it indicates, by a trailing N, +hen an operation can be res med to perform a side-effect after it has prod ced its last res lt( This information is some+hat more detailed than can easily be inferred by loo0ing at the ret rns, s spends, and fails in the detailed C code( The information is p t in the data base for se by the analysis phases of iconc( 1n operation header also incl des an identifier( This pro)ides the name for b ilt-in f nctions and 0ey+ords( $or all types of operations, rtt ses the identifier to constr ct the names of the C f nctions that implement the operations( The headers for operators also incl de an operator symbol( The parser for iconc is not reI ired to se this symbol for the synta. of the operation, b t for most operations it does so, list creation, Q ...R, is an e.ample of an e.ception( The headers for b ilt-in f nctions and operators incl de a parameter list( The list pro)ides names for the parameters and indicates +hether dereferenced and!or ndereferenced )ersions of the corresponding arg ment are needed by the operation( It also indicates +hether the operation ta0es a )ariable n mber of arg ments( The follo+ing are fi)e e.amples of operation headers(
function{0,1+} move(i) function{} bal(c1,c2,c3,s,i,j) operator{1} [...] llist(elems[n]) operator{0,1} / null(underef x -> dx) keyword{3} regions

mo)e is a f nction that ta0es one arg ment( It may prod ce 7ero or one res lt and may be res med to prod ce a side effect after its last res lt( bal is a f nction that ta0es si. arg ments( It prod ces an arbitrary n mber of res lts( The list-creation operator is gi)en the symbol Q(((R 3+hich may be sed for string in)ocation, if string in)ocation is enabled4 and is gi)en the name llist( It ta0es an arbitrary n mber of arg ments +ith elems being the array of arg ments and n being the n mber of arg ments( &ist creation al+ays prod ces one res lt( The ! operator is gi)en the name n ll( It ta0es one arg ment, b t both

221

ndereferenced and dereferenced )ersions are needed by the operation( It prod ces either 7ero or one res lt( Lregions is a 0ey+ord that prod ces three res lts( Type chec0ing and type con)ersion constr cts consist of an if-then constr ct, an if-thenelse constr ct, a typeVcase constr ct that selects code to e.ec te based on the type of an arg ment, and a lenVcase constr ct that selects code to e.ec te based on the n mber of arg ments in the )ariable part of a )ariable-length arg ment list( The conditions in the ifthen and if-then-else constr cts are composed of operations that chec0 the types of arg ments or attempt to con)ert arg ments to specific types( 1 type chec0 starts +ith \is@J( This is follo+ed by the name of a type and an arg ment in parentheses( $or e.ample, the then cla se of the follo+ing if is e.ec ted if . is a list(
if is:list(x) then ...

1 type con)ersion is similar to a type chec0, b t starts +ith \cn)@J( $or e.ample, the follo+ing code attempts to con)ert s to a string( If the con)ersion s cceeds, the then cla se of the if is e.ec ted(
if cnv:string(s) then ...

There are forms of con)ersion that con)ert a n ll )al e into a specified defa lt )al e, forms that p t a con)erted )al e in a location other than the parameter, and forms that con)ert Icon )al es into certain types of C )al es( The later type of con)ersion is con)enient beca se the detailed code is e.pressed in C( In addition, e.posing con)ersions bac0 and forth bet+een Icon and C )al es lea)es open the possibility of f t re optimi7ations to eliminate nnecessary con)ersions to Icon )al es( The control cla se of an if may also se limited forms of e.pressions in)ol)ing boolean operators( The f ll synta. and semantics of con)ersions are described in the implementation lang age reference man al( %etailed code is e.pressed in a slightly e.tended )ersion of C and is introd ced by one of t+o constr cts( The first is
inline { extended C }

This indicates that it is reasonable for the Icon compiler to p t the detailed code in line( The second constr ct is
body { extended C }

This indicates that the detailed code is too large to be p t in line and sho ld only appear as part of a C f nction in the r n-time library( The person +ho codes the operation is free to decide +hich pieces of detailed code are s itable to in-lining and +hich are not( The decision is easily changed, so an operation can be fine t ned after )ie+ing the code prod ced by the compiler( 6ne e.tension to C is the ability to declare )ariables that are tended +ith respect to garbage collection( 1nother e.tension is the ability to se some constr cts of the implementation lang age, s ch as type con)ersions, +ithin the C code( 1n important e.tension is the incl sion of ret rn, s spend, and fail statements that are analogo s to their Icon co nterparts( This e.tension, combined +ith the operation headers, ma0es the coding of r n-time ro tines independent of the C calling con)entions sed in the compiler system( The ret rn and s spend statements ha)e forms that con)ert C )al es to Icon )al es, pro)iding in)erses to con)ersions in the type chec0ing code of the implementation lang age( This mechanism is more than is necessary for those 0ey+ords that are simple constants( $or 0ey+ords that are string, cset, integer, or real constants there is a special form of

222

definition( The Icon compiler treats 0ey+ords coded +ith these definitions as manifest constants( 9hen f t re )ersions of the Icon compiler implement constant folding, that optimi7ation +ill be a tomatically applied to these 0ey+ords(

13.( ,tandard and Tailored Operation mplementations


$or e)ery operation that it translates, e.cept 0ey+ords, rtt creates a C f nction conforming to the standard calling con)entions of the compiler system( 9ith the help of the C compiler and library maintenance ro tine, rtt p ts an ob5ect mod le for that f nction in the compiler systemJs r n-time library( This f nction is s itable for in)ocation thro gh a proced re bloc0( It is sed +ith noptimi7ed in)ocations( rtt creates an entry in the data base for e)ery operation it translates, incl ding 0ey+ords( This entry contains the code for the operation( The code is stored in the data base in a form that is easier to parse and process than the original so rce, and the body statements are replaced by calls to C f nctions( These C f nctions are in the r n-time library and implement the code from the body statement( The calling con)entions for these f nctions are tailored to the needs of the code and do not, in general, conform to the standard calling con)entions of the compiler system( 9hen the compiler can determine that a partic lar operation is being in)o0ed, it locates the operation in the data base and applies information from type inferencing to simplify or eliminate the code in the operation that performs type chec0ing and con)ersions on arg ments( These simplifications +ill eliminate detailed code that +ill ne)er be e.ec ted in this in)ocation of the operation( The compiler can attempt the simplification beca se the type chec0ing code is in the data base in a form that is easily dealt +ith( If eno gh simplification is possible, a tailored )ersion of the operation is generated in line( This tailored )ersion incl des the simplified type chec0ing code, if there is any left( $or detailed code that has not been eliminated by the simplification, the tailored )ersion also incl des the C code from inline statements and incl des calls to the f nctions that implement the code in body statements( The process of prod cing tailored )ersions of b ilt-in operations is described in more detail in a later chapter( Ideally, +hen niI e types can be inferred for the operands of an operation, the compiler sho ld either prod ce a small piece of type-specific in-line C code or prod ce a call to a type-specific C f nction implementing the operation( It is possible to code operations in the implementation lang age s ch that the compiler can do this( In addition, this is the nat ral +ay to code most operations( $or the fe+ e.ceptions, there are reasonable compromises bet+een ideal generated code and elegant coding in the implementation lang age( This demonstrates that the design and implementation of the r n-time system and its comm nication +ith the compiler is s ccessf l(

223

Chapter 1": $rgani'ation of Iconc


The Icon compiler, iconc, ta0es as inp t the so rce code for an Icon program and, +ith the help of a C compiler and lin0er, prod ces an e.ec table file( The so rce code may be contained in se)eral files, b t iconc does not s pport separate compilation( It processes an entire program at once( This reI irement simplifies se)eral of the analyses, +hile allo+ing them to comp te more acc rate information( 9itho t the entire program being a)ailable, the effects of proced res in other files is n0no+n( In fact, it is not possible to disting ish b ilt-in f nctions from missing proced res( Type inferencing +o ld be partic larly +ea0ened( It +o ld ha)e to ass me that any call to an ndeclared )ariable co ld ha)e any side effect on global )ariables 3incl ding proced re )ariables4 and on any data str ct re reachable thro gh global )ariables or parameters(

14.1 Compiler !hases


Iconc is organi7ed into a n mber of phases( These are ill strated in the diagram on the follo+ing page( The initiali7ation phase incl des reading a data base of information abo t r n-time ro tines into internal tables( This information is sed in many of the other phases( The so rce analysis phase consists of a le.ical analy7er and parser( These are adapted from those sed in the interpreter system( The parser generates abstract synta. trees and symbol tables for all proced res before s bseI ent phases are in)o0ed( The symbol resol tion phase determines the scope of )ariables that are not declared in the proced res +here they are sed( This resol tion can only be done completely after all so rce files for the program are read( If a )ariable does not ha)e a local declaration, the compiler chec0s to see +hether the )ariable is declared global 3possibly as a proced re or record constr ctor4 in one of the so rce files( If not, the compiler chec0s to see +hether the )ariable name matches that of a b ilt-in f nction( If the name is still not resol)ed, it is considered to be a local )ariable(

224

14.$ =ai6e Optimizations


#ai)e optimi7ations in)ol)e in)ocation and assignment( These optimi7ations are done before type inferencing to aid that analysis( Certain \\deb gging feat resJJ of Icon s ch as the )ariable f nction interfere +ith these optimi7ations( /y defa lt, these feat res are disabled( If the ser of iconc reI ests the deb gging feat res, these optimi7ations are bypassed( 9hile these optimi7ations are being done, information is gathered abo t +hether proced res s spend, ret rn, or fail( This information is sed in se)eral places in the compiler( The in)ocation optimi7ation replaces general in)ocation by a direct form of in)ocation to a proced re, a b ilt-in f nction, or a record constr ctor( This optimi7ation in)ol)es modifying nodes in the synta. tree( It only applies to in)ocations +here the e.pression being in)o0ed is a global )ariable initiali7ed to a )al e in one of the three classes of proced re )al es( $irst, the Icon program is analy7ed to determine +hich )ariables of this type appear only as the immediate operands of in)ocations( #o s ch )ariable is e)er assigned to, so it retains its initial )al e thro gho t the program 3a more e.act analysis co ld be done to determine the )ariables that are not assigned to, b t this +o ld seldom yield better res lts in real Icon programs beca se these programs seldom do anything +ith

225

proced re )al es other that in)o0e them4( This means that all in)ocations of these )ariables can be replaced by direct in)ocations( In addition, the )ariables themsel)es can be discarded as they are no longer referenced( The in)ocation optimi7ation impro)es the speed of type inferencing in t+o +ays, altho gh it does nothing to impro)e the acc racy of the information prod ced( Performing type inferencing on direct in)ocations is faster than performing it on general in)ocations( In addition, type inferencing has fe+er )ariables to handle, +hich also speeds it p( The in)ocation optimi7ation does impro)e code generated by the compiler( In theory, the optimi7ation co ld be done better after type inferencing sing the information from that analysis, b t in practice this +o ld seldom prod ce better res lts( 6n most real Icon programs, this optimi7ation sing the nai)e analysis replaces all general in)ocations +ith direct ones( 1s noted in Chapter 3, it is important for type inferencing to disting ish strong pdates from +ea0 pdates( The data base contains a general description of assignment, b t it +o ld be diffic lt for a type inferencing system to se the description in recogni7ing that a simple assignment or an a gmented assignment to a named )ariable is a strong pdate( It is m ch easier to change general assignments +here the left hand side is a named )ariable to a special assignment and ha)e type inferencing 0no+ that the special assignment is a strong pdate( *pecial-casing assignment to named )ariables is also important for code generation( "eneral optimi7ations to r n-time ro tines are not adeI ate to prod ce the desired code for these assignments( The optimi7ations to assignment are described in Chapter 22( The details of type inferencing are described in other chapters( Prod cing code for the C main f nction, global )ariables, constants, and record constr ctors is straightfor+ard( C code is +ritten to t+o files for organi7ational p rposes, it allo+s definitions and code to be +ritten in parallel(

14.& Code 9eneration for !roced#res


Prod cing code for proced res in)ol)es se)eral s b-phases( The s b-phases are li)eness analysis, basic code generation, fi.- p and peephole optimi7ation, and o tp t( % ring this phase of code generation, proced res are processed one at at time( These s b-phases are described in later chapters( The code fi.- p phase and peephole optimi7ation are performed d ring the same pass o)er the internal representation of the C code( *ome clean- p from peephole optimi7ation is performed +hen the code is +ritten( The logical organi7ation of the compiler places the fi.- p phase as a pass in code generation +ith peephole optimi7ation being a separate phase( The organi7ation of this dissertation reflects the logical organi7ation of the compiler rather than its physical organi7ation( The physical organi7ation of this phase is sho+n in the follo+ing diagram(

22

22!

Chapter 1#: The Implementation of T8pe Inferencing


Chapter 1A de)elops a theoretical type inferencing model for Icon, called >odel 3( The p rpose of that chapter is to e.plain the relationship bet+een type inferencing and the semantics of Icon, for simplicity, some feat res of the lang age along +ith certain I estions of practical importance are ignored in that chapter( This chapter describes the implementation of the type inferencing system sed in the Icon compiler( The implementation is based on >odel 3( This chapter describes sol tions to those iss es that m st be addressed in de)eloping a complete practical type inferencing system from >odel 3( The iss es incl de@

the representation of types and stores the de)elopment of a type system for the f ll Icon lang age the handing of proced re calls and co-e.pression acti)ation the determination of edges in the flo+ graph the comp tation of a fi.ed point for type information

In addition, performance of the abstract interpretation m st be considered( This incl des both speed and memory sage(

17.1 The *epresentation of Types and ,tores


1 type consists of a set of basic types 3technically, it is a nion of basic types, b t the constit ents of the basic types are not e.plicitly represented4( The operations needed for these sets are@ add a basic type to a set, form the nion of t+o sets, form the intersection of t+o sets, test for membership in a set, and generate members of a s brange of basic types 3for e.ample, generate all members that are list types4( 1 bit )ector is sed for the set representation, +ith a basic type represented by an integer inde. into the )ector( The reI ired operations are simple and efficient to implement sing this representation( Unless the sets are large and sparse, this representation is also space efficient( 9hile the sets of types are often sparse, for typical programs, they are not large( 1 store is implemented as an array of pointers to types( 1 mapping is established from )ariable references to inde.es in the store( In the type inferencing model, >odel 3, presented in Chapter 3, there is one 0ind of store that contains all )ariables( In the act al implementation, temporary )ariables need not be 0ept in this store( The p rpose of this store is to propagate a change to a )ariable to the program points affected by the change( 1 temporary )ariable is set in one place in the program and sed in one place, there is nothing to determine dynamically( It is both effecti)e and efficient to store the type of a temporary )ariable in the corresponding node of the synta. tree( 1nother le)el of abstraction can be de)eloped that reI ires m ch less memory than >odel 3, b t it m st be modified to prod ce good res lts( This abstraction abandons the practice of a store for e)ery edge in the flo+ graph( Instead it has a single store that is a merger of all the stores in >odel 3 3the type associated +ith a )ariable in a merged store is the nion of the types obtained for that )ariable from each store being merged4( $or )ariables that are tr ly of one type thro gho t e.ec tion, this abstraction +or0s +ell( #amed )ariables

22"

do not ha)e this property( They ha)e an initial n ll )al e and s ally are assigned a )al e of another type d ring e.ec tion( /eca se assignments to named )ariables are treated as strong pdates, >odel 3 can often ded ce that a )ariable does not contain the n ll type at specific points in the flo+ graph( $or str ct re )ariables this f rther abstraction does +or0 +ell in practice( These )ariables are initiali7ed to the empty type 3that is, no instances of these )ariables e.ist at the start of program e.ec tion4, so the problem of the initial n ll type does not occ r( *ometimes instances of these )ariables are created +ith the n ll type and later changed( =o+e)er, the fact that assignments to these )ariables m st be treated as +ea0 pdates means that type inferencing m st ass me that these )ariables can al+ays retain their earlier type after an assignment( Propagating type information abo t str ct re )ariables thro gh the synta. tree does not help m ch in these circ mstances( 9hile it is possible to constr ct e.ample programs +here >odel 3 +or0s better for str ct re )ariables than this f rther abstraction, e.periments +ith prototype type inferencing systems indicate that the original system seldom gi)es better information for real programs Q(trGG-2A(R( Type inferencing in the compiler is implemented +ith t+o 0inds of stores@ local stores that are associated +ith edges in the flo+ graph and contain named )ariables 3both local and global )ariables4 and a global store that contains str ct re )ariables 3in the implementation, the global store is act ally bro0en p by str ct re-)ariable type into se)eral arrays4(

17.$ A %#ll Type ,ystem


>odel 3 from Chapter 3 incl des no str ct re type other than lists, nor does it consider ho+ to handle types for proced re and co-e.pression )al es to allo+ effecti)e type inferencing in their presence( This section de)elops a complete and effecti)e type system for Icon( >ost of the str ct re types of Icon are assigned se)eral s btypes in the type inferencing system( 1s e.plained for lists in Chapter 3, these s btypes are associated +ith the program points +here )al es of the type are created( The e.ception to this approach is records( 6ne type is created per record declaration( 9hile it is possible to assign a s btype to each se of a record constr ctor, in practice a gi)en 0ind of record s ally is sed consistently +ith respect to the types of its fields thro gho t a program( The e.tra s btypes +o ld reI ire more storage +hile seldom impro)ing the res lting type information( $or efficiency, the si7e of the bit )ectors representing types and the si7e of the stores need to remain fi.ed d ring abstract interpretation( This means that all s btypes m st be determined as part of the initiali7ation of the type inferencing system( In order to a)oid e.cessi)e storage sage, it is important to a)oid creating many s btypes for program points +here str ct res are not created( The in)ocation optimi7ation described in Chapter B helps determine +here str ct res can and cannot be created by determining for most in)ocations +hat operation is sed( The type inferencing system determines +hat str ct res an operation can create by e.amining the abstract type comp tations associated +ith the operation in the data base( 1 ne+ cla se in an abstract type comp tation indicates that a str ct re can be created( The follo+ing e.ample is the abstract type comp tation from the b ilt-in f nction list( It indicates the the f nction creates and ret rns a ne+ list +ith elements +hose type is the same as that of the parameter . 3the second parameter4(
abstract { return new list(type(x)) }

22#

This is the cla se as +ritten by the programmer de)eloping the r n-time library, it is translated into an internal form for storage in the data base( In)ocation optimi7ations may not identify the operation associated +ith some in)ocations( The initiali7ation phase of type inferencing s0ips these in)ocations( Type inferencing may later disco)er that one of these in)ocations can create a str ct re( Cach str ct re type is gi)en one s btype that is sed for all of these later disco)eries( 9hile it is possible for there to be se)eral of these creation points representing logically distinct s btypes, this seldom occ rs in practice( If it does happen, type inferencing prod ces a correct, b t less precise, res lt( The type system contains representations for all r n-time )al es that m st be modeled in the abstract interpretation( These r n-time )al es can be di)ided into three categories, each of +hich is a s perset of the pre)io s category@

the first-class Icon )al es the intermediate )al es of e.pression e)al ation the )al es sed internally by Icon operations

J st as these categories appear in different places d ring the e.ec tion of an Icon program, the corresponding types appear in different places d ring abstract interpretation( If certain types cannot appear as the res lt of a partic lar type comp tation, it is not necessary to ha)e elements in the bit )ector prod ced by the comp tation to represent those types( It is partic larly important to minimi7e the memory sed for stores associated +ith edges of the flo+ graph 3this is disc ssed more in the last section of this chapter4( These stores contain only the types of the smallest set listed abo)e@ the first-class )al es( Types 3or s btypes4 are allocated bit )ector inde.es( The first-class types may appear as the res lt of any of the three classes of comp tation( They are allocated inde.es at the front of the bit )ectors( If they are the only types that can res lt from an abstract comp tation, the bit )ector for the res lt has no elements beyond that of the last first-class types( The first-class types are@

n ll string cset integer real file list s btypes set s btypes table s btypes record s btypes proced re s btypes co-e.pression s btypes

23&

The list s btypes are allocated +ith


one for the arg ment to the main proced re one for each easily recogni7ed creation point one representing all other lists

The set s btypes are allocated +ith


one for each easily recogni7ed creation point one representing all other sets

The table s btypes are allocated +ith


one for each easily recogni7ed creation point one representing all other tables

The record s btypes are allocated +ith one for each record declaration( The proced re s btypes are allocated +ith

one for each proced re one for each record constr ctor one for each b ilt-in f nction one representing operators a)ailable for string in)ocation

#ote that proced re s btypes are allocated after most proced re and f nction )al es ha)e been eliminated by in)ocation optimi7ations 3the proced res and f nctions are still there, they are 5 st not Icon )al es4( Therefore, fe+ of these s btypes are act ally allocated( The co-e.pression s btypes are allocated +ith

one for the main co-e.pression one for each create e.pression

The bit )ectors sed to hold the intermediate res lts of performing abstract interpretation on e.pressions m st be able to represent the basic types pl s the )ariable reference types( 'ariable reference types are allocated bit )ector inde.es follo+ing those of the basic types( The bit )ectors for intermediate res lts are 5 st long eno gh to contain these t+o classifications of types( The )ariable reference types are

integer 0ey+ord )ariable types Lpos Ls b5ect s bstring trapped )ariable types table-element trapped )ariable types list-element reference types table assigned-)al e reference types field reference types

231

global )ariable reference types local )ariable reference types

Lrandom and Ltrace beha)e the same +ay from the perspecti)e of the type inferencing system, so they are gro ped nder one type as integer 0ey+ord )ariables( The fact that Lpos can ca se assignment to fail is reflected in the type inferencing system, so it is gi)en a separate type( Ls b5ect is the only string 0ey+ord )ariable so it is in a type by itself( * bstring trapped )ariables and table-element trapped )ariables are \\hiddenJJ str ct res in the implementation of Icon( They appear as intermediate res lts, b t are only indirectly obser)able in the semantics of Icon( In order to reflect these semantics in the type inferencing system, there are s bstring trapped )ariable types and table-element trapped )ariable types( These are str ct re types similar to sets, b t are )ariable reference types rather than first-class types( The s bstring trapped )ariable types are allocated +ith

one for each easily recogni7ed creation point one representing all other s bstring trapped )ariables

The table-element trapped )ariable types are allocated +ith


one for each easily recogni7ed creation point one representing all other table-element trapped )ariables

&ist elements, table assigned-)al es, and record fields are all )ariables that can appear as the intermediate res lts of e.pression e)al ation( The type system has corresponding )ariable reference types to represent them( The list-element reference types are allocated +ith one for each list types( The table assigned-)al e reference types are allocated +ith one for each table type( The field reference types are allocated +ith one for each record field declaration( Identifiers are )ariables and are reflected in the type system( The global )ariable reference types are allocated +ith

one for each global )ariable 3e.cept those eliminated by in)ocation optimi7ations4( one for each static )ariable

The local )ariable reference types are allocated +ith one for each local )ariable, b t the bit )ector inde.es and store inde.es are re sed bet+een proced res( The ne.t section describes +hy this re se is possible( IconJs operators se a n mber of internal )al es that ne)er \\escapeJJ as intermediate res lts( *ome of these internal )al es are reflected in the type system in order to describe the semantics of the operations in the abstract interpretation( The f ll set of types that can be sed to e.press these semantics are presented in the synta. of the abstract type comp tations of the r n-time implementation lang age, see 1ppendi. 1( In addition to the types of intermediate res lts, these types incl de

set-element reference types table 0ey reference types table defa lt )al e reference types references to the fields +ithin s bstring trapped )ariables that reference )ariables

232

references to fields +ithin table-element trapped )ariables that reference tables

These types are allocated bit )ector inde.es at the end of the type system( The only bit )ectors large eno gh to contain them are the temporary bit )ectors sed d ring interpretation of the abstract type comp tations of b ilt-in operations( *et elements, table 0eys, and table defa lt )al es do not appear as )ariable references in the res lts of e.pression e)al ation( =o+e)er, it is necessary to refer to them in order to describe the effects of certain Icon operations( $or this reason, they are incl ded in the type system( The set-element reference types are allocated +ith one for each set type( The table 0ey reference types are allocated +ith one for each table type( The table defa lt )al e reference types are allocated +ith one for each table type( * bstring trapped )ariable types contain references to the )ariable types being trapped and table-element trapped )ariable types contain references to the table types containing the element being trapped( These references are fields +ithin these trapped )ariable types( There is one field reference type for each trapped )ariable type(

17.& !roced#re Calls and Co+58pression Acti6ations


1 type inferencing system for the f ll Icon lang age m st handle proced res and coe.pressions( 1s noted abo)e, each proced re and each create e.pression is gi)en its o+n type( This allo+s the type inferencing system to acc rately determine +hich proced res are in)o0ed and +hat co-e.pressions might be acti)ated by a partic lar e.pression( The standard semantics for proced res and co-e.pressions can be implemented sing stac0s of proced re acti)ation frames, +ith one stac0 per co-e.pression( The first frame, on e)ery stac0 e.cept that of the main co-e.pression, is a copy of the frame for the proced re that created the co-e.pression( The local )ariables in this frame are sed for e)al ating the code of the co-e.pression( The type inferencing system ses a tri)ial abstraction of these proced re frame stac0s, +hile capt ring the possible transfers of control ind ced by proced re calls and co-e.pression acti)ations( The type inferencing system, in effect, ses an en)ironment that has one frame per proced re, +here that frame is a s mmary of all frames for the proced re that co ld occ r in a corresponding en)ironment of an implementation of the standard semantics( The frame is simply a portion of the store that contains local )ariables( /eca se no other proced re can alter a local )ariable, it is nnecessary to pass the types of local )ariables into proced re calls( If the called proced re ret rns control )ia a ret rn, s spend, or fail, the types are nchanged, so they can be passed directly across the call( This allo+s the type inferencing system to 0eep only the local )ariables of a proced re in the stores on the edges of the flo+ graph for the proced re, rather than 0eeping the local )ariables of all proced res( "lobal )ariables m st be passed into and o t of proced re calls( /eca se static )ariables may be altered in rec rsi)e calls, they m st also be passed into and o t of proced re calls( 1 flo+ graph for an entire program is constr cted from the flo+ graphs for its indi)id al proced res and co-e.pressions( 1n edge is added from e)ery in)ocation of a proced re to the head of that proced re and edges are added from e)ery ret rn, s spend, and fail bac0 to the in)ocation( In addition, edges m st be added from an in)ocation of a proced re to all the s spends in the proced re to represent res mption( 9hen it is not possible to predetermine +hich proced re is being in)o0ed, edges are effecti)ely added from the in)ocation to all proced res +hose in)ocation cannot be r led o t based on the nai)e

233

in)ocation optimi7ations( Cdges are effecti)ely added bet+een all co-e.pressions and all acti)ations, and bet+een all acti)ations( Information is propagated along an edge +hen type inferencing ded ces that the corresponding proced re call or co-e.pression acti)ation might act ally occ r( The representation of edges in the flo+ graph is disc ssed belo+( Type inferencing m st reflect the initiali7ations performed +hen a proced re is in)o0ed( &ocal )ariables are initiali7ed to the n ll )al e( 6n the first call to the proced re, static )ariables are also initiali7ed to the n ll )al e( The initiali7ation code for the standard semantics is similar to
initialize locals if (first_call) { initialize statics user initialization code }

In type inferencing, the )ariables are initiali7ed to the n ll type and the condition on the if is ignored( Type inferencing simply 0no+s that the first-call code is e.ec ted sometimes and not others( /efore entering the main proced re, global )ariables are set to the n ll type and all static )ariables are set to the empty type( In some sense, the empty type represents an impossible e.ec tion path( Type inferencing sees paths in the flo+ graph from the start of the program to the body of a proced re that ne)er pass thro gh the initiali7ation code( =o+e)er, static )ariables ha)e an empty type along this path and no operation on them is )alid( In)alid operations contrib te nothing to type information(

17.( The %low 9raph and Type Comp#tations


In order to determine the types of )ariables at the points +here they are sed, type inferencing m st comp te the contents of the stores along edges in the flo+ graph( Permanently allocating the store on each edge can se a large amo nt of memory( The sage is
M = E (G + S + L) T / 8

+here
M E G S L T = = = = = = total memory, expressed in bytes, used by stores the number of edges in the program flow graph the number of global variables in the program the number of static variables in the program the maximum number of local variables in any procedure the number of types in the type system

&arge programs +ith many str ct re creation points can prod ce tho sands of edges, do7ens of )ariables, and h ndreds of types, reI iring megabytes of memory to represent the stores( The code generation phase of the compiler 5 st needs the 3possibly dereferenced4 types of operands, not the stores( If dereferenced types are 0ept at the e.pressions +here they are needed, it is not necessary to 0eep a store +ith each edge of the flo+ graph( Consider a section of straight-line code +ith no bac0trac0ing( 1bstract interpretation can be performed on the graph starting +ith the initial store at the initial node and proceeding in e.ec tion order( 1t each node, the store on the edge entering the node is sed to dereference )ariables and to comp te the ne.t store if there are side effects( 6nce the comp tations at a node are done, the store on the edge entering the node is no longer needed( If pdates are done caref lly, they can be done in-place, so that the same memory can be sed for both the store entering a node and the one lea)ing it(

234

In the case of branching control paths 3as in a case e.pression4, abstract interpretation m st proceed along one path at a time( The store at the start the branching of paths m st be sa)ed for se +ith each path( =o+e)er, it need only be sa)ed ntil the last path is interpreted( 1t that point, the memory for the store can be re sed( 9hen paths 5oin, the stores along each path m st be merged( The merging can be done as each path is completed, the store from the path can then be re sed in interpreting other paths( 9hen all paths ha)e been interpreted, the merged store becomes the c rrent store for the node at the 5oin point( The start of a loop is a point +here control paths 5oin( Unli0e abstract interpretation for the 5oining of simple branching paths, abstract interpretation for the 5oining of bac0 edges is not 5 st a matter of interpreting all paths leading to the 5oin point before proceeding( The edge lea)ing the start of the loop is itself on a path leading to the start of the loop( Circ lar dependencies among stores are handled by repeatedly performing the abstract interpretation ntil a fi.ed point is reached( In this type inferencing system, abstract interpretation is performed in iterations, +ith each node in the flo+ graph )isited once per iteration( The nodes are )isited in e.ec tion order( $or bac0 edges, the store from the pre)io s iteration is sed in the interpretation( The stores on these edges m st be 0ept thro gho t the interpretation( These stores are initiali7ed to map all )ariables to the empty type( This represents the fact that the abstract interpretation has not yet pro)en that e.ec tion can reach the corresponding edge( The type inferencing system ne)er e.plicitly represents the edges of the flo+ graph in a data str ct re( Icon is a str ct red programming lang age( >any edges are implicit in a tree +al0 performed in for+ard e.ec tion order -- the order in +hich type inferencing is performed( The location of bac0 edges m st be predetermined in order to allocate stores for them, b t the edges themsel)es are effecti)ely recomp ted as part of the abstract interpretation( There are t+o 0inds of bac0 edges( The bac0 edges ca sed by looping control str ct res can be tri)ially ded ced from the synta. tree( 1 store for s ch an edge is 0ept in the node for the control str ct re( 6ther bac0 edges are ind ced by goal-directed e)al ation( These edges are determined +ith the same techniI es sed in li)eness analysis( 1 store for s ch an edge is 0ept in the node of the s spending operation that forms the start of the loop( /eca se the node can be the start of se)eral nested loops, this store is act ally the merged store for the stores that theoretically e.ist on each bac0 edge( 1t any point in abstract interpretation, three stores are of interest( The current store is the store entering the node on +hich abstract interpretation is c rrently being performed( It is created by merging the stores on the incoming edges( The success store is the store representing the state of comp tations +hen the operation s cceeds( It is s ally created by modifying the c rrent store( The failure store is the store representing the state of comp tations +hen the operation fails( In the presence of a s spended operation, the fail re store is the store 0ept at the node for that operation( 1 ne+ fail re store is established +hene)er a res mable operation is enco ntered( This +or0s beca se abstract interpretation is performed in for+ard e.ec tion order and res mption is &I$6( Control str ct res, s ch as if-then-else, +ith branching and 5oining paths of e.ec tion, ca se diffic lties beca se there may be more than one possible s spended operation +hen e.ec tion lea)es the control str ct re( This res lts in more than one fail re store d ring abstract interpretation( Eather than 0eeping m ltiple fail re stores +hen s ch a control str ct re has operations that s spend on m ltiple paths, type inferencing pretends that the control str ct re ends +ith an operation that does nothing

235

other than s spend and then fail( It allocates a store for this operation in the node for the control str ct re( 9hen later operations that fail are enco ntered, this store is pdated( The fail re of this imaginary operation is the only fail re seen by paths created by the control str ct re and the information needed to pdate the fail re stores for these paths is that in the store for this imaginary operation( This +or0s beca se the imaginary operation 5 st passes along fail re +itho t modifying the store( In the case +here a control str ct re transforms fail re into for+ard e.ec tion, as in the first s be.pression of a compo nd e.pression, the fail re store is allocated 3+ith empty types4 +hen the control str ct re is enco ntered and deallocated +hen it is no longer needed( If no fail re can occ r, no fail re store need be allocated( The lac0 of possible fail re is noted +hile the location of bac0 edges is being comp ted d ring the initiali7ation of type inferencing( /eca se a fail re store may be pdated at se)eral operations that can fail, these are +ea0 pdates( Typically, a fail re store is pdated by merging the c rrent store into it( The interproced ral flo+ graph described earlier in this chapter has edges bet+een in)ocations and ret rns, s spends, and fails( Type inferencing does not maintain separate stores for these theoretical edges( Instead it maintains three stores per proced re that are mergers of stores on se)eral edges( 6ne store is the merger of all stores entering the proced re beca se of in)ocation, this store contains parameter types in addition to the types of global and static )ariables( 1nother store is the merger of all stores entering the proced re beca se of res mption( The third store is the merger of all stores lea)ing the proced re beca se of ret rns, s spends, and fails( There is also a res lt type associated +ith the proced re( It is pdated +hen abstract interpretation enco nters ret rns and s spends( T+o stores are associated +ith each co-e.pression( 6ne is the merger of all stores entering the co-e.pression and the other is the merger of all stores lea)ing the co-e.pression( C.ec tion can not only lea)e thro gh an acti)ation operator, it can also re-enter thro gh the acti)ation( The store entering the acti)ation is a merger of the stores entering all coe.pressions in +hich the acti)ation can occ r( /eca se a proced re containing an acti)ation may be called from se)eral co-e.pressions, it is necessary to 0eep trac0 of those co-e.pressions( 1 set of co-e.pressions is associated +ith each proced re for this p rpose( Cach co-e.pression also contains a type for the )al es transmitted to it( The res lt type of an acti)ation incl des the res lt types for all co-e.pressions that might be acti)ated and the types of all )al es that can be transmitted to a co-e.pression that the acti)ation might be e.ec ted in( 9hen type inferencing enco nters the in)ocation of a b ilt-in operation, it performs abstract interpretation on the representation of the operation in the data base( It interprets the type-chec0ing code to see +hat paths might be ta0en thro gh the operation( The interpretation ses the abstract type comp tations and ignores the detailed C code +hen determining the side effects and res lt type of the operation( /eca se the code at this le)el of detail contains no loops, it is not necessary to sa)e stores internal to operations( 1n operation is re-interpreted at each in)ocation( This allo+s type inferencing to prod ce good res lts for polymorpho s operations( 1t this le)el, the code for an operation is simple eno gh that the cost of re-interpretation is not prohibiti)e( 1ll side effects +ithin these operations are treated as +ea0 pdates, the only strong pdates recogni7ed by type inferencing are the optimi7ed assignments to named )ariables 3see Chapter B4( The abstract semantics of control str ct res are hard-coded +ithin the type inferencing system( The system combines all the elements described in this chapter to perform the

23

abstract interpretation( 1 global flag is set any time an pdate changes type information that is sed in the ne.t iteration of abstract interpretation( The flag is cleared bet+een iterations( If the flag is not set d ring an iteration, a fi.ed point has been reached and the interpretation halts(

23!

Chapter 2&: Code ;eneration


This chapter describes the code generation process( The e.amples of generated code presented here are prod ced by the compiler, b t some cosmetic changes ha)e been made to enhance readability(o ter f nction for the proced re( This is the f nction that is seen as implementing the proced re( In addition to the o ter f nction, there may be se)eral f nctions for s ccess contin ations that are sed to implement generati)e e.pressions( The o ter f nction of a proced re m st ha)e feat res that s pport the semantics of an Icon call, 5 st as a f nction implementing a r n-time operation does( In general, a proced re m st ha)e a proced re bloc0 at r n time( This proced re bloc0 references the o ter f nction( 1ll f nctions referenced thro gh a proced re bloc0 m st conform to the compiler systemJs standard calling con)entions( =o+e)er, in)ocation optimi7ations s ally eliminate the need for proced re )ariables and their associated proced re bloc0s( 9hen this happens, the calling con)entions for the o ter f nction can be tailored to the needs of the proced re( 1s e.plained in Chapter 2, the standard calling con)ention reI ires fo r parameters@ the n mber of arg ments, a pointer to the beginning of an array of descriptors holding the arg ments, a pointer to a res lt location, and a s ccess contin ation to se for s spension( The f nction itself is responsible for dereferencing and arg ment list ad5 stment( In a tailored calling con)ention for an o ter f nction of a proced re, any dereferencing and arg ment list ad5 stment is done at the call site( This incl des creating an Icon list for the end of a )ariable-si7ed arg ment list( The compiler prod ces code to do this that is optimi7ed to the partic lar call( 1n e.ample of an optimi7ation is eliminating dereferencing +hen type inferencing determines that an arg ment cannot be a )ariable reference( The n mber of arg ments is ne)er needed in these tailored calling con)entions beca se the n mber is fi.ed for the proced re( 1rg ments are still passed )ia a pointer to an array of descriptors, b t if there are no arg ments, no pointer is needed( If the proced re ret rns no )al e, no res lt location is needed( If the proced re does not s spend, no s ccess contin ation is needed( In addition to pro)iding a calling interface for the rest of the program, the o ter f nction m st pro)ide local )ariables for se by the code generated for the proced re( These )ariables, along +ith se)eral other items, are located in a proced re frame( 1n Icon proced re frame is implemented as a C str ct re embedded +ithin the frame of its o ter C f nction 3that is, as a local str ct definition4( Code +ithin the o ter f nction can access the proced re frame directly( =o+e)er, contin ations m st se a pointer to the frame( 1 global C )ariable, pfp, points to the frame of the c rrently e.ec ting proced re( $or efficiency, contin ations load this pointer into a local register )ariable( The frame for a main proced re might ha)e the follo+ing declaration(
struct PF00_main { struct p_frame old_pfp; dptr old_argp; dptr rslt; continuation succ_cont; struct { struct tend_desc *previous; int num; struct descrip d[5];

23"

} tend; };

+ith the definition


struct PF00_main frame;

in the proced reJs o ter f nction( 1 proced re frame al+ays contains the follo+ing fi)e items@ a pointer to the frame of the caller, a pointer to the arg ment list of the caller, a pointer to the res lt location of this call, a pointer to the s ccess contin ation of this call, and an array of tended descriptors for this proced re( It may also contain C integer )ariables, C do ble )ariables, and string and cset b ffers for se in con)erting )al es( If deb gging is enabled, additional information is stored in the frame( The str ct re pVframe is a generic proced re frame containing a single tended descriptor( It is sed to define the pointer oldVpfp beca se the caller can be any proced re( The arg ment pointer, res lt location, and s ccess contin ation of the call m st be a)ailable to the s ccess contin ations of the proced re( 1 global C )ariable, argp, points the arg ment list for the c rrent call( This c rrent arg ment list pointer co ld ha)e been p t in the proced re frame, b t it is desirable to ha)e I ic0 access to it( H ic0 access to the res lt location and the s ccess contin ation of the call is less important, so they are accessed indirectly thro gh the proced re frame( The array of descriptors is lin0ed onto the chain sed by the garbage collector to locate tended descriptors( These descriptors are sed for Icon )ariables local to the proced re and for temporary )ariables that hold intermediate res lts( If the f nction is responsible for dereferencing and arg ment list ad5 stment 3that is, if it does not ha)e a tailored calling con)ention4, the modified arg ment list is constr cted in a section of these descriptors( The final thing pro)ided by the o ter f nction is a control environment in +hich code generation starts( In partic lar, it pro)ides the bo nding en)ironment for the body of the proced re and the implicit fail re at the end of the proced re( The follo+ing C f nction is the tailored o ter f nction for a proced re named p( The proced re has arg ments and ret rns a res lt( =o+e)er, it does not s spend, so it needs no s ccess contin ation(
static int P01_p(args, rslt) dptr args; dptr rslt; { struct PF01_p frame; register int signal; int i; frame.old_pfp = pfp; pfp = (struct p_frame )&frame; frame.old_argp = argp; frame.rslt = rslt; frame.succ_cont = NULL; for (i = 0; i < 3; ++i) frame.tend.d[i].dword = D_Null; argp = args; frame.tend.num = 3; frame.tend.previous = tend; tend = (struct tend_desc )&frame.tend; translation of the body of procedure p

23#

L10: /* bound */ L4: /* proc fail */ tend = frame.tend.previous; pfp = frame.old_pfp; argp = frame.old_argp; return A_Resume; L8: /* proc return */ tend = frame.tend.previous; pfp = frame.old_pfp; argp = frame.old_argp; return A_Continue; }

The initiali7ation code reflects the fact that this f nction has three tended descriptors to se for local )ariables and intermediate res lts( &10 is both the bo nding label and the fail re label for the body of the proced re( Code to handle proced re fail re and ret rn 3e.cept for setting the res lt )al e4 is at the end of the o ter f nction( 1s +ith bo nding labels, the labels for these pieces of code ha)e associated signals( If a proced re fail or ret rn occ rs in a s ccess contin ation, the contin ation ret rns the corresponding signal +hich is propagated to the o ter f nction +here it is con)erted into a goto( The code for proced re fail re is located after the body of the proced re, a tomatically implementing the implicit fail re at the end of the proced re(

$<.1 Translating con 58pressions


IconJs goal-directed e)al ation ma0es the implementation of control flo+ an important iss e d ring code generation( Code for an e.pression is generated +hile +al0ing the e.pressionJs synta. tree in for+ard e.ec tion order( % ring code generation there is al+ays a current failure action( This action is either \\branch to a labelJJ or \\ret rn a signalJJ( 9hen the translation of a proced re starts, the fail re action is to branch to the bo nding label of the proced re body( The action is changed +hen generators are enco ntered or +hile control str ct res that se fail re are being translated( The allocation of temporary )ariables to intermediate res lts is disc ssed in more detail later( =o+e)er, some aspects of it +ill be addressed before presenting e.amples of generated code( The res lt location of a s be.pression may be determined +hen the parent operation is enco ntered on the +ay do+n the synta. tree( This is s ally a temporary )ariable, b t does not ha)e to be( If no location has been assigned by the time the code generator needs to se it, a temporary )ariable is allocated for it( This temporary )ariable is sed in the code for the parent operation( The code generation process is ill strated belo+ +ith e.amples that se a n mber of control str ct res and operations( Code generation for other feat res of the lang age is similar( Consider the process of translating the follo+ing Icon e.pression@
return if a = (1 | 2) then "yes" else "no"

9hen this e.pression is enco ntered, there is some c rrent fail re action, perhaps a branch to a bo nding label( The ret rn e.pression prod ces no )al e, so +hether a res lt location has been assigned to it is of no conseI ence( If the arg ment of a ret rn fails, the proced re fails( To handle this possibility, the c rrent fail re action is set to branch to the label for proced re fail re before translating the arg ment 3in this e.ample, that action is not sed4( The code for the arg ment is then generated +ith its res lt location set to the res lt location of the proced re itself( $inally the res lt location is dereferenced and

24&

control is transferred to the proced re ret rn label( The dereferencing f nction, deref, ta0es t+o arg ments@ a pointer to a so rce descriptor and a pointer to a destination descriptor(
code for the if expression deref(rslt, rslt); goto L7 /* proc return */;

The control cla se of the if e.pression m st be bo nded( The code implementing the then cla se m st be generated follo+ing the bo nding label for the control cla se( 1 label m st also be set p for the else cla se +ith a branch to this label sed as the fail re action for the control cla se( #ote that the res lt location of each branch is the res lt location of the if e.pression +hich is in t rn the res lt location of the proced re( /eca se neither branch of the if e.pression contains operations that s spend, the t+o control paths can be bro ght together +ith branch to a label(
code for control clause L4: /* bound */ rslt->vword.sptr = "yes"; rslt->dword = 3; goto L6 /* end if */; L5: /* else */ rslt->vword.sptr = "no"; rslt->dword = 2; L6: /* end if */

Using a branch and a label to bring together the t+o control paths of the if e.pression is an optimi7ation( If the then or the else cla ses contain operations that s spend, the general contin ation model m st be sed( In this model, the code follo+ing the if e.pression is p t in a s ccess contin ation, +hich is then called at the end of both the code for the then cla se and the code for the else cla se( #e.t consider the translation of the control cla se( The n meric comparison operator ta0es t+o operands( In this translation, the standard calling con)entions are sed for the library ro tine implementing the operator( Therefore, the operands m st be in an array of descriptors( This array is allocated as a s b-array of the tended descriptors for the proced re( In this e.ample, tended location 0 is occ pied by the local )ariable, a( Tended locations 1 and 2 are free to be allocated as the arg ments to the comparison operator( The code for the first operand simply b ilds a )ariable reference(
frame.tend.d[1].dword = D_Var; frame.tend.d[1].vword.descptr = &frame.tend.d[0] /* a */;

=o+e)er, the second operand is alternation( This is a generator and reI ires a s ccess contin ation( In this e.ample, the contin ation is gi)en the name P02Vmain 3the Icon e.pression is part of the main proced re4( The contin ation contains the in)ocation of the r n-time f nction implementing the comparison operator and the end of the bo nded e.pression for the control cla se of the if( The f nction 60oVn meI implements the comparison operator( The if e.pression discards the operatorJs res lt( This is accomplished by sing the )ariable trashcan as the res lt location for the call( The compiler 0no+s that this operation does not s spend, so it passes a n ll contin ation to the f nction( The end of the bo nded e.pression consists of a transfer of control to the bo nding label( This is accomplished by ret rning a signal( The contin ation is
static int P02_main() { register struct PF00_main *rpfp;

241

rpfp = (struct PF00_main *)pfp; switch (O0o_numeq(2, &(rpfp->tend.d[1]), &trashcan, (continuation)NULL)) { case A_Continue: break; case A_Resume: return A_Resume; } return 4; /* bound */ }

Cach alternati)e of the alternation m st comp te the )al e of its s be.pression and call the s ccess contin ation( The fail re action for the first alternati)e is to branch to the second alternati)e( The fail re action of the second alternati)e is the fail re action of the entire alternation e.pression( In this e.ample, the fail re action is to branch to the else label of the if e.pression( In each alterati)e, a bo nding signal from the contin ation m st be con)erted into a branch to the bo nding label( #ote that this bo nding signal indicates that the control e.pression s cceeded(
frame.tend.d[2].dword = D_Integer; frame.tend.d[2].vword.integr = 1; switch (P02_main()) { case A_Resume: goto L2 /* alt */; case 4 /* bound */: goto L4 /* bound */; } L2: /* alt */ frame.tend.d[2].dword = D_Integer; frame.tend.d[2].vword.integr = 2; switch (P02_main()) { case A_Resume: goto L5 /* else */; case 4 /* bound */: goto L4 /* bound */; }

The code for the entire ret rn e.pression is obtained by p tting together all the pieces( The res lt is the follo+ing code 3the code for P02Vmain is not repeated4(
frame.tend.d[1].dword = D_Var; frame.tend.d[1].vword.descptr = &frame.tend.d[0] /* a */; frame.tend.d[2].dword = D_Integer; frame.tend.d[2].vword.integr = 1; switch (P02_main()) { case A_Resume: goto L2 /* alt */; case 4 /* bound */: goto L4 /* bound */; } L2: /* alt */ frame.tend.d[2].dword = D_Integer; frame.tend.d[2].vword.integr = 2; switch (P02_main()) { case A_Resume: goto L5 /* else */; case 4 /* bound */:

242

goto L4 /* bound */; } L4: /* bound */ rslt->vword.sptr = yes; rslt->dword = 3; goto L6 /* end if */; L5: /* else */ rslt->vword.sptr = no; rslt->dword = 2; L6: /* end if */ deref(rslt, rslt); goto L7 /* proc return */;

$<.$ ,ignal Handling


In order to prod ce signal handling code, the code generator m st 0no+ +hat signals may be ret rned from a call( These signals may be either directly prod ced by the operation 3or proced re4 being called or they may originate from a s ccess contin ation( #ote that either the operation or the contin ation may be missing from a call, b t not both( The signals prod ced directly by an operation are 1VEes me, 1VContin e, and 1V$allThr 3this last signal is only sed internally +ithin in-line code4( The signals prod ced by a s ccess contin ation belong to one of three categories@ 1VEes me, signals corresponding to labels +ithin the proced re the contin ation belongs to, and signals corresponding to labels in proced res farther do+n in the call chain( The last category only occ rs +hen the proced re s spends( The s ccess contin ation for the proced re call may ret rn a signal belonging to the calling proced re( This is demonstrated in the follo+ing e.ample 3the generated code has been \\cleaned- pJJ a little to ma0e it easier to follo+4( The Icon program being translated is
procedure main() write(p()) end procedure p() suspend 1 to 10 end

The generati)e proced re p is called in a bo nded conte.t( The code generated for the call is
switch (P01_p(&frame.tend.d[0], P05_main)) { case 7 /* bound */: goto L7 /* bound */; case A_Resume: goto L7 /* bound */; } L7: /* bound */

This call ses the follo+ing s ccess contin ation( The contin ation +rites the res lt of the call to p then signals the end of the bo nded e.pression(
static int P05_main() { register struct PF00_main *rpfp; rpfp = (struct PF00_main )pfp; F0c_write(1, &rpfp->tend.d[0], &trashcan, (continuation)NULL);

243

return 7; /* bound */ }

The to operator in proced re p needs a s ccess contin ation that implements proced re s spension( * spension is implemented by s+itching to the old proced re frame pointer and old arg ment pointer, then calling the s ccess contin ation for the call to p( The s ccess contin ation is accessed +ith the e.pression rpfp-Ts ccVcont( In this e.ample, the contin ation +ill only be the f nction P0AVmain( The s spend m st chec0 the signal ret rned by the proced re callJs s ccess contin ation( =o+e)er, the code generator does not try to determine e.actly +hat signals might be ret rned by a contin ation belonging to another proced re( * ch a contin ation may ret rn an 1VEes me signal or a signal belonging to some proced re farther do+n in the call chain( In this e.ample, bo nding signal F +ill be ret rned and it belongs to main( If the callJs s ccess contin ation ret rns 1VEes me, the proced re frame pointer and arg ment pointer m st be restored, and the c rrent fail re action m st be e.ec ted( In this case, that action is to ret rn an 1VEes me signal to the to operator( If the callJs s ccess contin ation ret rns any other signal, that signal m st be propagated bac0 thro gh the proced re call( The follo+ing f nction is the s ccess contin ation for the to operator(
static int P03_p() { register int signal; register struct PF01_p *rpfp; rpfp = (struct PF01_p *)pfp; deref(rpfp->rslt, rpfp->rslt); pfp = rpfp->old_pfp; argp = rpfp->old_argp; signal = (*rpfp->succ_cont)(); if (signal != A_Resume) { return signal; } pfp = (struct p_frame *)rpfp; argp = NULL; return A_Resume; }

The follo+ing code implements the call to the to operator( The signal handling code associated +ith the call m st pass along any signal from the proced re callJs s ccess contin ation( These signals are recogni7ed by the fact that the proced re frame for the calling proced re is still in effect( 1t this point, the signal is propagated o t of the proced re p( /eca se the proced re frame is abo t to be remo)ed from the C stac0, the descriptors it contains m st be remo)ed from the tended list(
frame.tend.d[0].dword = D_Integer; frame.tend.d[0].vword.integr = 1; frame.tend.d[1].dword = D_Integer; frame.tend.d[1].vword.integr = 10; signal = O0k_to(2, &frame.tend.d[0], rslt, P03_p); if (pfp != (struct p_frame )&frame) { tend = frame.tend.previous; return signal; } switch (signal) { case A_Resume:

244

goto L2 /* bound */; } L2: /* bound */

*o far, this disc ssion has not addressed the I estion of ho+ the code generator determines +hat signals might be ret rned from a call( /eca se code is generated in e.ec tion order, a call in)ol)ing a s ccess contin ation is generated before the code in the contin ation is generated( This ma0es it diffic lt to 0no+ +hat signals might originate from the s ccess contin ation( This problem e.ists for direct calls to a s ccess contin ation and for calls to an operation that ses a s ccess contin ation( The problem is sol)ed by doing code generation in t+o parts( The first part prod ces incomplete signal handling code( 1t this time, code to handle the signals prod ced directly by an operation is generated( The second part of code generation is a fi.- p pass that completes the signal handling code by determining +hat signals might be prod ced by s ccess contin ations( The code generator constr cts a call graph of the contin ations for a proced re( *ome of these calls are indirect calls to a contin ation thro gh an operation( =o+e)er, the only effect of an operation on signals ret rned by a contin ation is to intercept 1VEes me signals( 1ll other signals are 5 st passed along( This is tr e e)en if the operation is a proced re( This call graph of contin ations does not contain the proced re call graph nor does it contain contin ations from other proced res( $or+ard e.ec tion order imposes a partial order on contin ations( 1 contin ation only calls contin ations strictly greater in for+ard e.ec tion order than itself( Therefore the contin ation call graph is a %1"( The fi.- p pass is done +ith a bottom- p +al0 of the contin ation call %1"( This pass determines +hat signals are ret rned by each contin ation in the %1"( 9hile processing a contin ation, the fi.- p pass e.amines each contin ation call in that contin ation( 1t the point it processes a call, it has determined +hat signals might be ret rned by the called contin ation( It ses this information to complete the signal handling code associated +ith the call and to determine +hat signals might be passed along to contin ations higher p the %1"( If a contin ation contains code for a s spend, the fi.- p pass notes that the contin ation may ret rn a foreign signal belonging to another proced re call( 1s e.plained abo)e, foreign signals are handled by special code that chec0s the proced re frame pointer(

$<.& Temporary 'aria.le Allocation


The code generator ses the li)eness information for an intermediate )al e +hen allocating a temporary )ariable to hold the )al e( 1s e.plained in Chapter <, this information consists of the f rthest program point, represented as a node in the synta. tree, thro gh +hich the intermediate )al e m st be retained( 9hen a temporary )ariable is allocated to a )al e, that )ariable is placed on a deallocation list associated +ith the node beyond +hich its )al e is not needed( 9hen the code generator passes a node, all the temporary )ariables on the nodeJs deallocation list are deallocated( The code generator maintains a status array for temporary )ariables +hile it is processing a proced re( The array contains one element per temporary )ariable( This array is e.pandable, allo+ing a proced re to se an arbitrary n mber of temporary )ariables( In a simple allocation scheme, the stat s of a temporary )ariable is either free or in-use( The entry for a temporary )ariable is initially mar0ed free, it is mar0ed in- se +hen the )ariable is allocated, and it is mar0ed free again +hen the )ariable is deallocated(

245

The simple scheme +or0s +ell +hen temporary )ariables are allocated independently( It does not +or0 +ell +hen arrays of contig o s temporary )ariables are allocated( This occ rs +hen temporary )ariables are allocated to the arg ments of a proced re in)ocation or any in)ocation conforming to the standard calling con)entions, nder these circ mstances, the arg ment list is implemented as an array( 1ll of the contig o s temporary )ariables m st be reser)ed before the first one is sed, e)en tho gh many operations may be performed before the last one is needed( Eather than mar0 a temporary )ariable in- se before it act ally is, the compiler ses the program point +here the temporary )ariable +ill be sed to mar0 the temporary )ariableJs entry in the stat s array as reserved( 1 contig o s array of temporary )ariables are mar0ed reser)ed at the same time, +ith each ha)ing a different reser)ation point( 1 reser)ed temporary )ariable may be allocated to other intermediate )al es as long as it +ill be deallocated before the reser)ation point( In this scheme, an entry in a deallocation list m st incl de the pre)io s stat s of the temporary )ariable as it might be a reser)ed stat s( The compiler allocates a contig o s s barray of temporary )ariables for the arg ments of an in)ocation +hen it enco nters the in)ocation on the +ay do+n the synta. tree d ring its tree +al0( It ses a first-fit algorithm to find a large eno gh s barray that does not ha)e a conflicting allocation( Consider the problem of allocating temporary )ariables to the e.pression
f1(f2(f3(x, f4())), y)

+here f1 can fail and f< is a generator( The synta. tree for this e.pression is sho+n belo+( #ote that in)ocation nodes sho+ the operation as part of the node label and not as the first operand to general in)ocation( This reflects the direct in)ocation optimi7ation that is s ally performed on in)ocations( Cach node in the graph is gi)en a n meric label( These labels increase in )al e in for+ard e.ec tion order(

The follo+ing fig re sho+s the operations in for+ard e.ec tion order +ith lines on the left side of the diagram sho+ing the lifetime of intermediate )al es( This represents the o tp t of the li)eness analysis phase of the compiler( /eca se f< can be res med by f1, the )al e of the e.pression . has a lifetime that e.tends to the in)ocation of f1( The e.tended portion of the lifetime is indicated +ith a dotted line(

24

The follo+ing series of diagrams ill strate the process of allocating intermediate )al es( Cach diagram incl des an annotated synta. tree and a stat s array for temporary )ariables( 1n arro+ in the tree sho+s the c rrent location of the tree +al0( 1 deallocation list is located near the pper right of each node( 1n element in the list consists of a temporary )ariable n mber and the stat s +ith +hich to restore the )ariableJs entry in the stat s array( If a temporary )ariable has been allocated to an intermediate )al e, the )ariableJs n mber appears near the lo+er right of the corresponding node( The stat s array is sho+n +ith fo r elements( The elements are initiali7ed to 3 +hich indicates that the temporary )ariables are free( 1 reser)ed temporary )ariable is indicated by placing the node n mber of the reser)ation point in the corresponding element( 9hen a temporary )ariable is act ally in se, the corresponding element is set to 5( Temporary )ariables are reser)ed +hile +al0ing do+n the synta. tree( The tree ill strated belo+ on the left sho+s the state of allocation after temporary )ariables ha)e been allocated for the operands of f1( T+o contig o s )ariables are needed( 1ll )ariables are free, so the first-fit algorithm allocates )ariables 0 and 1( The stat s array is pdated to indicate that these )ariables are reser)ed for nodes . and / respecti)ely, and the nodes are annotated +ith these )ariable n mbers( The lifetime information in the pre)io s fig re indicates that these )ariables sho ld be deallocated after f1 is e.ec ted, so the deallocation array for node 0 is pdated( The ne.t step is the allocation of a temporary )ariable to the operand of f2( The intermediate )al e has a lifetime e.tending from node 3 to node .( This conflicts +ith the allocation of )ariable 0, b t not the allocation of )ariable 1( Therefore, )ariable 1 is allocated to node 3 and the deallocation list for node . is pdated( This is ill strated in the tree on the right@

24!

The final allocation reI ires a contig o s pair of )ariables for nodes 1 and 2( The )al e from node 1 has a lifetime that e.tends to node 0, and the )al e from node 2 has a lifetime that e.tends to node 3( The c rrent allocations for )ariables 0 and 1 conflict +ith the lifetime of the intermediate )al e of node 1, so the )ariables 2 and 3 are sed in this allocation( This is ill strated in the tree@

The remaining actions of the allocator in this e.ample mar0 temporary )ariables in- se +hen the code generator ses them and restore pre)io s allocated stat ses +hen temporary )ariables are deallocated( This is done in the si. steps ill strated in the follo+ing diagram( The annotations on the graph do not change( 6nly the node of interest is sho+n for each step( These steps are performed in node-n mber order(

24"

In general, the tree +al0 +ill alternate p and do+n the synta. tree( $or e.ample, if node / had children, the allocation stat s after the deallocation associated +ith node .,

is sed to allocate temporary )ariables to those children( If this reI ires more than fo r temporary )ariables, the stat s array is e.tended +ith elements initiali7ed to 3( This allocation algorithm is not g aranteed to prod ce an allocation that ses a minimal n mber of temporary )ariables( Indeed, a smaller allocation for the pre)io s e.ample is ill strated in the tree@

9hile the non-optimality of this algorithm is nli0ely to ha)e a meas rable effect on the performance of any practical program, the problem of finding an efficient optimal sol tion is of theoretical interest( Classical res lts in the area of register allocation do not apply( It is possible to allocate a minim m n mber of registers from e.pression trees for con)entional lang ages in polynomial time Q(dragon(R( The algorithm to do this depends on

24#

the fact that registers 3temporary )ariables4 are dead as soon as the )al e they contain is sed( This is not tr e for Icon temporary )ariables( The res lt of Prabhala and *ethi stating that register allocation is #P-complete e)en in the presence of an infinite s pply of registers also does not apply Q(prabhala s be.p(R( Their comple.ity res lt deri)es from performing register allocation in the presence of common s be.pression elimination 3that is, from performing register allocation on e.pression %1"* rather than trees4 on a 2-address-instr ction machine +ith optimality meas red as the minim m n mber of instr ctions needed to implement the program( "oal-directed e)al ation imposes more str ct re on lifetimes than common s be.pression elimination, the machine model sed here is the C lang age, and optimality is being meas re as the minim m n mber of temporary )ariables needed( The Icon temporary )ariable allocation problem is different from the Prolog )ariable allocation problem( Prolog ses e.plicit )ariables +hose lifetimes can ha)e arbitrary o)erlaps e)en in the absence of goal-directed e)al ation( The Prolog allocation problem is eI i)alent to the classical graph coloring problem +hich is #P-complete Q(debray apr:1, dragon(R( If the allocation of a s barray of temporary )ariables is delayed ntil the first one is act ally needed in the generated code, an optim m allocation res lts for the preceding e.ample( It is not ob)io s +hether this is tr e for the general case of e.pression trees employing goal-directed e)al ation( This problem is left for f t re +or0( In addition to holding intermediate )al es, temporary )ariables are sed as local tended )ariables +ithin in-line code( This affects the pattern of allocations, b t not the nderlying allocation techniI e(

25&

Chapter 21: Control 6lo% $ptimi'ations


$1.1 =ai6e Code 9eneration
#ai)e code generation does not consider the effects and needs of the immediately s rro nding program( The res lt is often a poor se of the target lang age( C)en sophisticated code generation schemes that consider the effects of relati)ely large pieces of the program still prod ce poor code at the bo ndaries bet+een the pieces( This problem is typically sol)ed by adding a peephole optimi6er to the compiler to impro)e the generated code Q(peep1,9 lf,Tanenba m peephole,dragon(R( 1 peephole optimi7er loo0s at se)eral instr ctions that are ad5acent 3in terms of e.ec tion4 and tries to replace the instr ctions by better, s ally fe+er, instr ctions( It typically analy7es a )ariety of properties of the instr ctions s ch as addressing modes and control flo+( The Icon compiler has a peephole optimi7er that +or0s on the internal form of the generated C code and deals only +ith control flo+( The pre)io s e.amples of generated code contain a n mber of instances of code +here control flo+ can be handled m ch better( $or e.ample, it is possible to entirely eliminate the follo+ing code fragment generated for the e.ample e.plaining proced re s spension(
switch (signal) { case A_Resume: goto L2 /* bound */; } L2: /* bound */

This code is prod ced beca se the code generator does not ta0e into acco nt the fact that the bo nding label happens to immediately follo+ the test(

$1.$ ,#ccess Contin#ations


$or the C code in the preceding e.ample, it is I ite possible that a C compiler +o ld prod ce machine code that its o+n peephole optimi7er co ld eliminate( =o+e)er, it is nli0ely that a C compiler +o ld optimi7e nai)ely generated s ccess contin ations( 1n earlier e.ample of code generation prod ced the contin ation@
static int P02_main() { register struct PF00_main *rpfp; rpfp = (struct PF00_main *)pfp; switch (O0o_numeq(2, &(rpfp->tend.d[1]), &trashcan, (continuation)NULL)) { case A_Continue: break; case A_Resume: return A_Resume; } return 4; /* bound */ }

If the statement
return 4; /* bound */

251

is bro ght into the s+itch statement, replacing the brea0, then P02Vmain consists of a simple operation call 3a C call +ith associated associated signal handling code4( This operation call is
switch (O0o_numeq(2, &(rpfp->tend.d[1]), &trashcan, (continuation)NULL)) { case A_Continue: return 4; /* bound */ case A_Resume: return A_Resume; }

P02Vmain is called directly in t+o places in the follo+ing code(


frame.tend.d[2].dword = D_Integer; frame.tend.d[2].vword.integr = 1; switch (P02_main()) { case A_Resume: goto L2 /* alt */; case 4 /* bound */: goto L4 /* bound */; } L2: /* alt */ frame.tend.d[2].dword = D_Integer; frame.tend.d[2].vword.integr = 2; switch (P02_main()) { case A_Resume: goto L5 /* else */; case 4 /* bound */: goto L4 /* bound */; } L4: /* bound */

1 direct call to a tri)ial f nction can reasonably be replaced by the body of that f nction( 9hen this is done for a contin ation, it is necessary to compose the signal handling code of the body of a contin ation +ith that of the call( This is accomplished by replacing each ret rn statement in the body +ith the action in the call corresponding to the signal ret rned( The follo+ing table ill strates the signal handling composition for the first call in the code( The res lting code chec0s the signal from 60oVn meI and performs the final action( signal from 60oVn meI 1VContin e 1VEes me The res lt of in-lining P02Vmain is
frame.tend.d[2].dword = D_Integer; frame.tend.d[2].vword.integr = 1; switch (O0o_numeq(2, &frame.tend.d[1], &trashcan, (continuation)NULL)) { case A_Continue: goto L4 /* bound */; case A_Resume: goto L2 /* alt */; } L2: /* alt */ frame.tend.d[2].dword = D_Integer;

signal from P02Vmain < 1VEes me

final action goto &<, goto &2,

252

frame.tend.d[2].vword.integr = 2; switch (O0o_numeq(2, &frame.tend.d[1], &trashcan, (continuation)NULL)) { case A_Continue: goto L4 /* bound */; case A_Resume: goto L5 /* else */; } L4: /* bound */

9ith a little more manip lation, the s+itch statements can be con)erted into if statements and the label &2 can be eliminated@
frame.tend.d[2].dword = D_Integer; frame.tend.d[2].vword.integr = 1; if (O0o_numeq(2, &frame.tend.d[1], &trashcan, (continuation)NULL) == A_Continue) goto L4 /* bound */; frame.tend.d[2].dword = D_Integer; frame.tend.d[2].vword.integr = 2; if (O0o_numeq(2, &frame.tend.d[1], &trashcan, (continuation)NULL) == A_Resume) goto L5 /* else */; L4: /* bound */

The Icon compilerJs peephole optimi7er recogni7es t+o 0inds of tri)ial contin ations( The 0ind ill strated in the pre)io s e.ample consists of a single call +ith associated signal handling( The other 0ind simply consists of a single ret rn-signal statement( 1s in the abo)e e.ample, contin ations do not s ally meet this definition of tri)iality ntil control flo+ optimi7ations are performed on them( $or this reason, the Icon compilerJs peephole optimi7er m st perform some optimi7ations that co ld other+ise be left to the C compiler(

$1.& conc>s !eephole Optimizer


The peephole optimi7er performs the follo+ing optimi7ations@

elimination of nreachable code elimination of gotos immediately preceding their destinations collapse of branch chains deletion of n sed labels collapse of tri)ial call chains 3that is, in-lining tri)ial contin ations4 deletion of n sed contin ations simplification of signal chec0ing

Unreachable code follo+s a goto or a ret rn, and it contin es to the first referenced label or to the end of the f nction( This optimi7ation may eliminate code that ret rns signals, thereby red cing the n mber of signals that m st be handled by a contin ation call( This pro)ides another reason for performing this traditional optimi7ation in the Icon compiler rather than letting the C compiler do it( This code is eliminated +hen the fi.- p pass for signal handling is being performed( gotos immediately preceding their labels also are eliminated at this time(

253

Un sed labels s ally are eliminated +hen the code is +ritten o t, b t they may be deleted as part of a segment of nreachable code( Un sed contin ations are simply not +ritten o t( 1 branch chain is formed +hen the destination of a goto is another goto or a ret rn( 1 brea0 in a s+itch statement is treated as a goto( There may be se)eral gotos in a chain( Cach goto is replaced by the goto or ret rn at the end of the chain( This may lea)e some labels nreferenced and may lea)e some of the intermediate gotos nreachable( /ranch chains are collapsed d ring the fi.- p pass( Inter-f nction optimi7ation is not traditionally considered a peephole optimi7ation( This is beca se h man beings seldom +rite tri)ial f nctions and most code generators do not prod ce contin ations( The Icon compiler, ho+e)er, ses calls to s ccess contin ations as freely as it ses gotos( Therefore collapsing tri)ial call chains is as important as collapsing branch chains( There are t+o 0inds of calls to tri)ial contin ations@ direct calls and indirect calls thro gh an operation( 1 direct call al+ays can be replaced by the body of the contin ation sing signal handling code that is a composition of that in the contin ation and that of the call( If the contin ation consists of 5 st a ret rn statement, this means that the call is replaced by the action associated +ith the ret rned signal@ either another ret rn statement or a goto statement( $or contin ations consisting of a call, the composition is more complicated, as demonstrated by the e.ample gi)en earlier in this chapter( In the case of an indirect call thro gh an operation, the contin ation cannot be placed in line( =o+e)er, there is an optimi7ation that can be applied( Under some circ mstances, the compiler prod ces a contin ation that simply calls another contin ation( $or e.ample, this occ rs +hen compiling the Icon e.pression
every write(!x | end)

The compiler allocates a contin ation for the alternation, then compiles the e.pression M.( The element generation operator s spends, so the compiler allocates a contin ation for it and code generation proceeds in this contin ation( =o+e)er, the end of the first alternati)e has been reached so the only code for this contin ation is a call to the contin ation for the alternation( The contin ation for the alternation contains the code for the in)ocation of +rite and for the end of the e)ery control str ct re( The code for the first alternati)e is
frame.tend.d[2].dword = D_Var; frame.tend.d[2].vword.descptr = &frame.tend.d[0] /* x */; switch (O0e_bang(1, &frame.tend.d[2], &frame.tend.d[1], P02_main)) { case A_Resume: goto L1 /* alt */; } L1: /* alt */

The code for the t+o contin ations are


static int P02_main() { switch (P03_main()) { case A_Resume: return A_Resume; } } static int P03_main()

254

{ register struct PF00_main *rpfp; rpfp = (struct PF00_main *)pfp; F0c_write(1, &rpfp->tend.d[1], &trashcan, (continuation)NULL); return A_Resume; }

The call to 60eVbang can be optimi7ed by passing the contin ation P03Vmain in place of P02Vmain( The optimi7ations that collapse tri)ial call chains are performed d ring the fi.- p pass for signal handling( The final peephole optimi7ation in)ol)es simplifying the signal handling code associated +ith a call( In general, signals are handled +ith a s+itch statement containing a case cla se for each signal( The C compiler does not 0no+ that these signals are the only )al es that are e)er tested by the s+itch statement, nor is the C compiler li0ely to notice that some cases simply pass along to the ne.t f nction do+n in the call chain the signal that +as recei)ed( The Icon compiler can se this information to optimi7e the signal handling beyond the le)el to +hich the C compiler is able to optimi7e it( The optimi7er may replace the general form of the s+itch statement +ith a s+itch statement tili7ing a defa lt cla se or +ith an if statement( In some cases, the optimi7er completely eliminates signal chec0ing( This optimi7ation is done +hen the code is +ritten(

255

Chapter 22: $ptimi'ing Invocations


*e)eral optimi7ations apply to the in)ocation of proced res and b ilt-in operations( These incl de optimi7ations res lting from the application of information from type inferencing, optimi7ations res lting from the application of lifetime information to passing parameters and ret rning res lts, and optimi7ations in)ol)ing the generation of in-line code( There are interactions bet+een the optimi7ations in these three categories( 1 primary moti)ation in de)eloping the Icon compiler +as to e.plore the optimi7ations that are possible sing information from type inferencing( These optimi7ations in)ol)e eliminating type chec0ing and type con)ersions +here type inferencing indicates that they are not needed( %ereferencing is not normally )ie+ed as a type con)ersion, beca se )ariable references are not first-class )al es in Icon( =o+e)er, )ariable references occ r as intermediate )al es and do appear in the type system sed by the Icon compiler( Therefore, from the perspecti)e of the compiler, dereferencing is a type con)ersion( 9hen a proced re or b ilt-in operation is implemented as a C f nction conforming to the standard calling con)entions of the compiler system, that f nction is responsible for performing any type chec0ing and type con)ersions needed by the proced re or operation( $or this reason, the chec0ing and con)ersions can only be eliminated from tailored implementations(

$$.1 n6ocation of !roced#res


1s e.plained earlier, a proced re has one implementation@ either a standard implementation or a tailored implementation( If the compiler decides to prod ce a tailored implementation, the caller of the proced re is responsible for dereferencing( 9hen type inferencing determines that an operand is not a )ariable reference, no dereferencing code is generated( * ppose p is a proced re that ta0es one arg ment and al+ays fails( If P01Vp is the tailored C f nction implementing p, then it ta0es one arg ment@ a pointer to a descriptor containing the dereferenced Icon arg ment( 9itho t sing type information, the call p334 translates into
frame.tend.d[0].dword = D_Integer; frame.tend.d[0].vword.integr = 3; deref(&frame.tend.d[0], &frame.tend.d[0]); P01_p(&frame.tend.d[0]);

9ith the se of type information, the call to deref is eliminated@


frame.tend.d[0].dword = D_Integer; frame.tend.d[0].vword.integr = 3; P01_p(&frame.tend.d[0]);

$$.$ n6ocation and n+lining of B#ilt+in Operations


IconJs b ilt-in operations present more opport nities for these optimi7ations than proced res, beca se they can contain type chec0ing and con)ersions beyond dereferencing( / ilt-in operations are treated differently than proced res( C.cept for 0ey+ords, there is al+ays a C f nction in the r n-time library that implements the operation sing the standard calling con)entions( In addition, the compiler can create se)eral tailored in-line )ersions of an operation from the information in the data base(

25

It is important to 0eep in mind that there are t+o le)els of in-lining( 1n in-line )ersion of an operation al+ays in)ol)es the type chec0ing and con)ersions of the operation 3altho gh they may be optimi7ed a+ay d ring the tailoring process4( =o+e)er, detailed code is placed in-line only if it is specified +ith an inline statement in the r n-time system( If the detailed code is specified +ith a body statement, the \\in-lineJJ code is a f nction call to a ro tine in the r n-time library( The difference can be seen by comparing the code prod ced by compiling the e.pression X. to that prod ced by compiling the e.pression !.( The definition in the r n-time implementation lang age of cset complement is
operator{1} ~ compl(x) if !cnv:tmp_cset(x) then runerr(104, x) abstract { return cset } body { ... } end

The con)ersion to tmpVcset is a con)ersion to a cset that does not se space in the bloc0 region( Instead the cset is constr cted in a temporary local b ffer( The data base entry for the operation indicates that the arg ment m st be dereferenced( The entry has a C translation of the type con)ersion code +ith a call to the s pport ro tine, cn)Vtcset, to do the act al con)ersion( cn)Vtcset ta0es three arg ments@ a b ffer, a so rce descriptor, and a destination descriptor( The entry in the data base has a call to the f nction 61B0Vcompl in place of the body statement( This f nction ta0es as arg ments the arg ment and the res lt location of the operation( The code generator ignores the abstract cla se( The in-line code for X. is
frame.tend.d[3].dword = D_Var; frame.tend.d[3].vword.descptr = &frame.tend.d[0] /* x */; deref(&frame.tend.d[3], &frame.tend.d[3]); if (cnv_tcset(&(frame.cbuf[0]), &(frame.tend.d[3]), &(frame.tend.d[3]))) goto L1 /* then: compl */; err_msg(104, &(frame.tend.d[3])); L1: /* then: compl */ O160_compl(&(frame.tend.d[3]) , &frame.tend.d[2]);

The follo+ing is the definition of the \!J operator( #ote that both ndereferenced and dereferenced )ersions of the arg ment are sed(
operator{0,1} / null(underef x -> dx) abstract { return type(x) } if is:null(dx) then inline { return x; } else inline { fail; } end

In this operation, all detailed code is specified +ith inline statements( The generated code for !. follo+s( #ote that the order of the then and else cla ses is re)ersed to simplify the test( &3 is the fail re label of the e.pression( The ret rn is implemented as an assignment

25!

to the res lt location, frame(tend(dQ2R, +ith e.ec tion falling off the end of the in-line code(
frame.tend.d[3].dword = D_Var; frame.tend.d[3].vword.descptr = &frame.tend.d[0] /* x */; deref(&frame.tend.d[3], &frame.tend.d[4]); if (frame.tend.d[4].dword == D_Null) goto L2 /* then: null */; goto L3 /* bound */; L2: /* then: null */ frame.tend.d[2] = frame.tend.d[3];

If type inferencing determines a niI e type for . in each of these e.pressions, the type chec0ing is eliminated from the code( * ppose type inferencing determines that . can only be of type cset in the e.pression a @P X. If parameter passing and assignment optimi7ations 3these are e.plained belo+4 are combined +ith the elimination of type chec0ing, the res lting code is
O160_compl(&(frame.tend.d[0] /* x */), &frame.tend.d[1] /* a */);

The form of this translated code meets the goals of the compiler design for the in)ocation of a complicated operation@ a simple call to a type-specific C f nction +ith minim m parameter passing( The implementation lang age for r n-time operations reI ires that type con)ersions be specified in the control cla se of an if statement( =o+e)er, some con)ersions, s ch as con)erting a string to a cset, are g aranteed to s cceed( If the code generator recogni7es one of these con)ersions, it eliminates the if statement( The only code generated is the con)ersion and the code to be e.ec ted +hen the con)ersion s cceeds( * ppose type inferencing determines that . in the preceding e.ample can only be a string( Then the generated code for the e.ample is
frame.tend.d[2] = frame.tend.d[0] /* x */; cnv_tcset(&(frame.cbuf[0]), &(frame.tend.d[2]), &(frame.tend.d[2])); O160_compl(&(frame.tend.d[2]) , &frame.tend.d[1] / a /);

$$.& He#ristic for -eciding to n+line


The in-line code for the operators sho+n so far in the section is relati)ely small( =o+e)er, the ntailored in-line code for operations li0e the element generation operator, M, is large( If tailoring the code does not prod ce a large red ction in si7e, it is better to generate a call to the C f nction in the r n-time library that ses the standard calling con)entions( 1 he ristic is needed for deciding +hen to se in-line code and +hen to call the standard C f nction( 1 simple he ristic is to se in-line code only +hen all type chec0ing and con)ersions can be eliminated( =o+e)er, this precl des the generation of in-lining code in some important sit ations( The operator ! is sed to direct control flo+( It sho ld al+ays be sed +ith an operand +hose type can )ary at r n time, and the generated code sho ld al+ays be inlined( Consider the Icon e.pression
if /x then x :=

The compiler applies parameter-passing optimi7ations to the s b-e.pression !.( It also eliminates the ret rn )al e of the operator, beca se the )al e is discarded( 1n implementation con)ention for operations allo+s the compiler to discard the e.pression

25"

that comp tes the ret rn )al e( The con)ention reI ires that a ret rn e.pression of an operation not contain ser-)isible side effects 3storage allocation is an e.ception to the r le, it is )isible, b t the lang age ma0es no g arantees as to +hen it +ill occ r4( The code for !. is red ced to a simple type chec0( The code generated for the if e.pression is
if ((frame.tend.d[0] /* x */).dword == D_Null) goto L2 /* bound */; goto L3 /* bound */; L2: /* bound */ frame.tend.d[0] /* x */.vword.sptr = ; frame.tend.d[0] /* x */.dword = 0; L3: /* bound */

To accommodate e.pressions li0e those in the preceding e.ample, the he ristic sed in the compiler is to prod ce tailored in-line code +hen that code contains no more than one type chec0( 6nly con)ersions retaining their if statements are co nted as a type chec0s( This simple he ristic prod ces reasonable code( $ t re +or0 incl des e.amining more sophisticated he ristics(

$$.( n+lining ,#ccess Contin#ations


* spension in in-line code pro)ides f rther opport nity for optimi7ation( In general, s spension is implemented as a call to a s ccess contin ation( =o+e)er, if there is only one call to the contin ation, it is better not to p t the code in a contin ation( The code sho ld be generated at the site of the s spension( Consider the e.pression
every p(1 to 10)

The implementation of the to operator is


operator{} ... to(from, to) /* * arguments must be integers. */ if !cnv:C_integer(from) then runerr(101, from) if !cnv:C_integer(to) then runerr(101, to) abstract { return integer } inline { for ( ; from <= to; ++from) { suspend C_integer from; } fail; } end

The arg ments are 0no+n to be integers, so the tailored )ersion consists of 5 st the code in the inline statement( The for statement is con)erted to gotos and conditional gotos, so the control flo+ optimi7er can handle it 3this con)ersion is done by rtt before p tting the code in the data base4( The s spend is translated into code to set the res lt )al e and a fail re label sed for the code of the rest of the bo nded e.pression( This code is generated before the label and consists of a call to the proced re p and the fail re introd ced by the e)ery e.pression( The generated code follo+s( The fail re for the e)ery e.pression is translated into goto &<, +here &< is the fail re label introd ced by the s spend( The

25#

control flo+ optimi7er remo)es both the goto and the label( They are retained here to el cidate the code generation process(
frame.tend.d[1].dword = D_Integer; frame.tend.d[1].vword.integr = 1; frame.tend.d[2].dword = D_Integer; frame.tend.d[2].vword.integr = 10; L1: /* within: to */ if (!(frame.tend.d[1].vword.integr <= frame.tend.d[2].vword.integr) ) goto L2 /* bound */; frame.tend.d[0].vword.integr = frame.tend.d[1].vword.integr; frame.tend.d[0].dword = D_Integer; P01_p(&frame.tend.d[0]); goto L4 /* end suspend: to */; L4: /* end suspend: to */ ++frame.tend.d[1].vword.integr; goto L1 /* within: to */; L2: /* bound */

This is an e.ample of a generator +ithin an e)ery e.pression being con)erted into an inline loop( C.cept for the fact that descriptors are being sed instead of C integers, this is nearly as good as the C code
for (i = 1; i <= 10; ++i) p(i);

$$.) !arameter !assing Optimizations


1s mentioned abo)e, parameter-passing optimi7ations are sed to impro)ed the generated code( These optimi7ations in)ol)e eliminating nneeded arg ment comp tations and eliminating nnecessary copying( These optimi7ations are applied to tailored in-line code( They m st ta0e into acco nt ho+ a parameter is sed and +hether the corresponding arg ment )al e has an e.tended lifetime( In some sit ations, a parameter is not sed in the tailored code( There are t+o common circ mstances in +hich this happens( 6ne is for the first operand of con5 nction( The other occ rs +ith a polymorpho s operation that has a type-specific optional parameter( If a different type is being operated on, the optional parameter is not referenced in the tailored code( If a tailored operation has an nreferenced parameter and the in)ocation has a corresponding arg ment e.pression, the compiler notes that the e.pression res lt is discarded( Carlier in this chapter there are e.amples of optimi7ations possible +hen e.pression res lts are discarded( If the corresponding arg ment is missing, the compiler refrains from s pplying a n ll )al e for it( Consider the in)ocation
insert(x, 3)

insert ta0es three arg ments( If . is a table, the third arg ment is sed as the entry )al e and m st be s pplied in the generated code( In the follo+ing generated code, the defa lt )al e for the third arg ment is comp ted into frame(tend(dQ2R(d+ord@
frame.tend.d[1].dword = D_Integer; frame.tend.d[1].vword.integr = 3; frame.tend.d[2].dword = D_Null; frame.tend.d[3] = frame.tend.d[0] /* x */;

2 &

F1o0_insert(&(frame.tend.d[2]), &(frame.tend.d[1]), &(frame.tend.d[3]), &trashcan);

/eca se $1o0Vinsert ses a tailored calling con)ention, its arg ments can be in a different order from those of the Icon f nction( It appears that the arg ment e.pression . is comp ted in the +rong place in the e.ec tion order( =o+e)er, this is not tr e, the e.pression is not comp ted at all( If it +ere, the res lt +o ld be a )ariable reference( Instead, the assignment of the )al e in . to the temporary )ariable is a form of optimi7ed dereferencing( Therefore, it m st be done as part of the operation, not as part of the arg ment comp tations( This is e.plained belo+( If the )al e of . in this e.pression is a set instead of a table, the entry )al e is not sed( This is ill strated by the follo+ing code( #ote that a different C f nction is called for a set than for a table, this is beca se a different body statement is selected(
frame.tend.d[1].dword = D_Integer; frame.tend.d[1].vword.integr = 3; frame.tend.d[2] = frame.tend.d[0] /* x */; F1o1_insert(&(frame.tend.d[1]) , &(frame.tend.d[2]) , &trashcan);

In general, an operation m st copy its arg ment to a ne+ descriptor before sing it( This is beca se an operation is allo+ed to modify the arg ment( >odification of the original arg ment location is not safe in the presence of goal-directed e)al ation( The operation co ld be re-e.ec ted +itho t recomp ting the arg ment( Therefore, the original )al e m st be a)ailable( This is demonstrated +ith the follo+ing e.pression(
every p(0 to (1 to 3))

This is a do ble loop( The o ter to e.pression is the inner loop, +hile the inner to e.pression is the o ter loop( to modifies its first arg ment +hile co nting( =o+e)er, the first arg ment to the o ter to has an e.tended lifetime d e to the fact that the second arg ment is a generator( Therefore, this to operator m st ma0e a copy of its first arg ment( The generated code for this e)ery e.pression is
frame.tend.d[2].dword = D_Integer; frame.tend.d[2].vword.integr = 0; frame.tend.d[4].dword = D_Integer; frame.tend.d[4].vword.integr = 1; frame.tend.d[5].dword = D_Integer; frame.tend.d[5].vword.integr = 3; L1: /* within: to */ if (!(frame.tend.d[4].vword.integr <= frame.tend.d[5].vword.integr)) goto L2 /* bound */; frame.tend.d[3].vword.integr = frame.tend.d[4].vword.integr; frame.tend.d[3].dword = D_Integer; frame.tend.d[6] = frame.tend.d[2]; L3: /* within: to */ if (!(frame.tend.d[6].vword.integr <= frame.tend.d[3].vword.integr)) goto L4 /* end suspend: to */; frame.tend.d[1].vword.integr = frame.tend.d[6].vword.integr; frame.tend.d[1].dword = D_Integer; P01_p(&frame.tend.d[1]);

2 1

++frame.tend.d[6].vword.integr; goto L3 /* within: to */; L4: /* end suspend: to */ ++frame.tend.d[4].vword.integr; goto L1 /* within: to */; L2: /* bound */

The first arg ment to the o ter to is copied +ith the statement
frame.tend.d[6] = frame.tend.d[2];

The copying of the other arg ments has been eliminated beca se of t+o obser)ations@ the second arg ment of to is ne)er modified and the first arg ment of the inner to 3o ter loop4 is ne)er re sed +itho t being recomp ted( This second fact is determined +hile the lifetime information is being calc lated( There is no generator occ rring bet+een the comp tation of the arg ment and the e.ec tion of the operator( C)en if there +ere, it +o ld only necessitate copying if the generator co ld be res med after the operator started e.ec ting( 1s noted abo)e, another set of optimi7ations in)ol)es deferencing named )ariables( If an operation needs only the dereferenced )al e of an arg ment and type inferencing determines that the arg ment is a specific named )ariable 3recall that each named )ariable is gi)en a distinct )ariable reference type4, the code generator does not need to generate code to comp te the )ariable reference, beca se it 0no+s +hat it is( That is, it does not need the )al e of the arg ment( If the arg ment is a simple identifier, no code at all is generated for the arg ment( 1s sho+n in the code presented abo)e for
insert(x, 3)

dereferencing can be implemented as simple assignment rather than a call to the deref f nction@
frame.tend.d[3] = frame.tend.d[0] /* x */;

In fact, nless certain conditions interfere, the )ariable can be sed directly as the arg ment descriptor and no copying is needed( This is reflected in the code generated in a pre)io s e.ample@
if /x then ...

. is sed directly in the in-line code for !@


if ((frame.tend.d[0] /* x */).dword == D_Null) goto L2 /* bound */;

This optimi7ation cannot be performed if the operation modifies the arg ment, nor can it be performed if the )ariableJs )al e might change +hile the operation is e.ec ting( Performing the optimi7ation in the presence of the second condition +o ld )iolate the semantics of arg ment dereferencing( The compiler does t+o simple tests to determine if the second condition might be tr e( If the operation has a side effect, the compiler ass mes that the side-effect might in)ol)e the named )ariable( *ide effects are e.plicitly coded in the abstract type comp tations of the operation( The second test is to see if the arg ment has an e.tended lifetime( The compiler ass mes that the )ariable might be changed by another operation d ring the e.tended lifetime 3that is, +hile the operation is s spended4(

$$./ Assignment Optimizations


The final set of in)ocation optimi7ations in)ol)es assignments to named )ariables( These incl des simple assignment and a gmented assignments( 6ptimi7ing these assignments is

2 2

important and optimi7ations are possible beyond those that can easily be done +or0ing from the definition in the data base, assignments to named )ariables are treated as special cases( The optimi7ations are di)ided into the cases +here the right-hand-side might prod ce a )ariable reference and those +here it prod ces a simple Icon )al e( There are t+o cases +hen the right-hand-side of the assignment e)al ates to a )ariable reference( If the right-hand-side is a named )ariable, a dereferencing optimi7ation can be sed( Consider
s := s1

This Icon e.pression is translated into


frame.tend.d[0] /* s */ = frame.tend.d[1] /* s1 */;

This is the ideal translation of this e.pression( $or other sit ations, the deref f nction m st be sed( $or e.ample the e.pression
s := ?x

is translated into
if (O0f2_random(&(frame.tend.d[0] /* x */), &frame.tend.d[2]) == A_Resume) goto L1 /* bound */; deref(&frame.tend.d[2], &frame.tend.d[1] /* s */);

9hen the right-hand-side comp tes to a simple Icon )al e, the named )ariable on the lefthand-side can often be sed directly as the res lt location of the operation( This occ rs in the earlier e.ample
a := ~x

+hich translates into


O160_compl(&(frame.tend.d[0] /* x */), &frame.tend.d[1] /* a */);

This optimi7ation is safe as long as setting the res lt location is the last thing the operation does( If the operation ses the res lt location as a +or0 area and the )ariable +ere sed as the res lt location, the operation might see the premat re change to the )ariable( In this case, a separate res lt location m st be allocated and the Icon assignment implemented as a C assignment( *tring concatenation is an e.ample of an operation that ses its res lt location as a +or0 area( The e.pression
s := s1 || s

is translated into
if (StrLoc(frame.tend.d[1] /* s1 */) + StrLen(frame.tend.d[1] /* s1 */) == strfree ) goto L1 /* within: cater */; StrLoc(frame.tend.d[2]) = alcstr(StrLoc(frame.tend.d[1] /* s1 */), StrLen(frame.tend.d[1] /* s1 */)); StrLen(frame.tend.d[2]) = StrLen(frame.tend.d[1] /* s1 */); goto L2 /* within: cater */; L1: /* within: cater */ frame.tend.d[2] = frame.tend.d[1] /* s1 */; L2: /* within: cater */ alcstr(StrLoc(frame.tend.d[0] /* s */), StrLen(frame.tend.d[0] /* s */));

2 3

StrLen(frame.tend.d[2]) = StrLen(frame.tend.d[1] /* s1 */) + StrLen(frame.tend.d[0] /* s */); frame.tend.d[0] /* s */ = frame.tend.d[2];

frame(tend(dQ2R is the res lt location( If frame(tend(dQ0R 3the )ariable s4 +ere sed instead, the code +o ld be +rong( There are still some optimi7ations falling nder the category co)ered by this chapter to be e.plored as f t re +or0( $or e.ample, as sho+n earlier,
a := ~x

is translated into
frame.tend.d[2] = frame.tend.d[0] /* x */; cnv_tcset(&(frame.cbuf[0]), &(frame.tend.d[2]), &(frame.tend.d[2])); O160_compl(&(frame.tend.d[2]) , &frame.tend.d[1] /* a */);

+hen . is a string( The assignment to frame(tend(dQ2R can be combined +ith the con)ersion to prod ce the code
cnv_tcset(&(frame.cbuf[0]), &(frame.tend.d[0] /* x */), &(frame.tend.d[2])); O160_compl(&(frame.tend.d[2]) , &frame.tend.d[1] /* a */);

There is, of co rse, al+ays room for impro)ement in code generation for specific cases( =o+e)er, the optimi7ations in this chapter combine to prod ce good code for most e.pressions( This is reflected in the performance data presented in Chapter 23(

2 4

Chapter 23: Performance of Compiled Code


The performance of compiled code is affected by the )ario s optimi7ations performed by the compiler( This chapter demonstrates the effects of these optimi7ations on the e.ec tion speed of Icon e.pressions( It also presents speed impro)ements and memory sage for compiled code )ers s interpreted code for a set of complete Icon programs( 1ll timing res lts sed in this chapter +ere obtained on a * n <!<:0 and are the a)erage of the res lts from three r ns(

$&.1 58pression Optimizations


The effects of fo r categories of optimi7ation are demonstrated( These are assignment optimi7ations, in)ocation optimi7ations, control flo+ optimi7ations, and optimi7ations sing information from type inferencing( C.pression timings for the first three categories +ere made sing techniI es described in the 1 g st 1::0 iss e of 7he 5con #nalyst Q(ianl1(R( The follo+ing program s0eleton is sed to constr ct the programs to perform these timings(
procedure main() local x, start, overhead, iters iters := 1000000 start := &time every 1 to iters do { } overhead := &time - start x := 0 start := &time every 1 to iters do { expression to be timed (may use x) } write(&time - start - overhead) end

The timings are performed both +ith and +itho t the desired optimi7ations, and the res lts are compared by comp ting the ratio of the time +itho t optimi7ation to the time +ith optimi7ation( The assignment optimi7ations are described in Chapter 10( The effect of the assignment optimi7ations on the e.pression
x := &null

is meas red sing the program o tlined abo)e( The analysis that prod ces the assignment optimi7ation is disabled by enabling deb gging feat res in the generated code( The only other effect this has on the assignment e.pression is to insert code to pdate the line n mber of the e.pression being e.ec ted( In this test, the line n mber code is remo)ed before the C code is compiled, ins ring that the assignment optimi7ation is the only thing meas red( The timing res lts for this test prod ce 1ssignment Test Time in >illiseconds 1)eraged o)er Three E ns Unoptimi7ed 1122 6ptimi7ed Eatio <FG 2(3

2 5

The tests +ere performed +ith type inferencing enabled( Therefore, e)en the 2 noptimi7ed2 )ersion of the assignment has the standard operation optimi7ations applied to it( This test demonstrates the importance of performing the special-case assignment optimi7ations( The ne.t category of optimi7ation meas red is in)ocation optimi7ation( This res lts in the direct in)ocation of the C f nctions implementing operations, or in some cases res lts in the operations being generated in line( The e.ec tion time for the e.pression
tab(0)

is meas red +ith and +itho t in)ocation optimi7ations( 1s +ith the assignment optimi7ations, these optimi7ations are disabled by enabling deb gging feat res( 6nce again the line n mber code is remo)ed before the C code is compiled( These optimi7ations interact +ith the optimi7ations that se information from type inferencing( The meas rements are made +ith type inferencing disabled( Therefore, no type chec0ing simplifications are performed( 9itho t the in)ocation optimi7ations, the generated code consists of an indirect in)ocation thro gh the global )ariable tab( 9ith the in)ocation optimi7ations, the generated code consists of type chec0ing!con)ersion code for the arg ment to tab and a call to the f nction implementing the body statement of tab( The timing res lts for tab304 prod ce In)ocation Test Time in >illiseconds 1)eraged o)er Three E ns Unoptimi7ed G3:< 6ptimi7ed Eatio <321 1(:

The third category of optimi7ation is control flo+ optimi7ation( 1s e.plained in Chapter :, these optimi7ations only perform impro)ements that a C compiler +ill not perform +hen the code contains tri)ial call chains( 6ne sit ation that prod ces tri)ial call chains is nested alternation( The e.ec tion time for the e.pression
every x := ixor(x, 1 | 2 | 3 | 4 | 5)

is meas red +ith and +itho t control flo+ optimi7ations( The timing res lts for this e)ery loop prod ce Control $lo+ Test Time in >illiseconds 1)eraged o)er Three E ns Unoptimi7ed B3G< 6ptimi7ed Eatio <1G< 1(A

The final category of optimi7ation res lts from type inferencing( The speed impro)ements res lt from generating operations in line, eliminating type chec0ing, and generating s ccess contin ations in line( Use of the to operation is a good e.ample of +here these optimi7ations can be applied( This is demonstrated by meas ring the speed of an e)ery loop sing the to operation( The program that performs the meas rement is
procedure main() local x, start start := &time every x := 1 to 5000000 write(&time - start) end

The timing res lts for this program prod ce

Type Inference Test Time in >illiseconds 1)eraged o)er Three E ns Unoptimi7ed :233 6ptimi7ed Eatio 2F21 3(3

1nother approach to determining the effecti)eness of type inferencing is to meas re ho+ small a set it ded ces for the possible types of operands to operations( This indicates +hether f t re +or0 sho ld concentrate on impro)ing type inferencing itself or simply concentrate on sing type information more effecti)ely in code generation( 1 simple meas re is sed here@ the percentage of operands for +hich type inferencing ded ces a niI e Icon type( >eas rements are made for operands of all operators, e.cept optimi7ed assignment, and for operands of all b ilt-in f nctions appearing in optimi7ed in)ocations( $or the most part, these are the operations +here the code generator can se type information( >eas rements +ere made for a set of 1< programs 3described belo+4( UniI e operand types +ithin each program range from B3 percent to 100 percent of all operands, +ith an o)erall fig re for the tests s ite of G0 percent 3this is a straight n+eighted fig re obtained by considering all operands in the test s ite +itho t regard to +hat program they belong to4, e)en a perfect type inferencing system +ill not ded ce niI e types for 100 percent of all operands, beca se not all operands ha)e niI e types( This s ggests that an impro)ed type inferencing system may benefit some programs, b t +ill ha)e only a small o)erall impact( $ t re +or0 sho ld gi)e priority to ma0ing better se of the type information rather than to increasing the acc racy of type inferencing(

$&.$ !rogram 58ec#tion ,peed


It has been demonstrated that the compiler optimi7ations are effecti)e at impro)ing the 0inds of e.pressions they are directed to+ard( The I estion remains@ =o+ fast is compiled code 3+ith and +itho t optimi7ations4 for complete programs as compared to interpreted code for the same programs? $or some e.pressions, optimi7ations may interact to create significant c m lati)e speed impro)ements( $or e.ample, the f lly optimi7ed code for the e)ery loop in the pre)io s e.ample is 30 times faster than the interpreted code, the impro)ement of 3(3 from type inferencing contrib tes one factor in the total impro)ement( 6ther e.pressions may spend so m ch time in the r n-time system 3+hich is naffected by compiler optimi7ations4 that no meas rable impro)ements are prod ced( 1 set of 1< programs +as selected mostly from contrib tions to the Icon program library Q(tr:0-F(R for testing the performance of the compiler( These programs +ere selected to represent a )ariety of applications and programming styles 3an additional reI irement is that they r n long eno gh to obtain good timing res lts4( The follo+ing table sho+s the speed impro)ements for the compiled code as compared to interpreted code( The compiler and interpreter sed for the meas rements both implement 'ersion G of Icon( The e.ec tion time sed to comp te the speed impro)ements is the cp time meas red sing the /o rne shellJs time command( The first col mn in the table sho+s the e.ec tion time nder the interpreter( The second col mn is for compiled code +ith deb gging feat res enabled and optimi7ations disabled( This code is still better than +hat +o ld be obtained by 5 st remo)ing the interpreter loop, beca se intelligent code generation is performed, especially for bo nded e.pressions, and 0ey+ords are generated in line( The third col mn is for code +ith deb gging feat res disabled and f ll optimi7ation enabled(

2 !

C.ec tion Time in *econds 1)eraged o)er Three E ns Program c0sol concord iidecode iiencode impress list memfiltr mf pssplit roffcmds sentence spande. te.tcnt +rapper Interpreter <:(: 31(1 B0(3 A0(< <<(B <3(1 B0(G 30(1 B<(0 32(: 3<(3 3B(G 3B(2 2F(3 Compiler Compiler Unoptimi7ed 6ptimi7ed 33(A 31(<G4 1G(A 31(BG4 3<(0 31(FF4 3<(< 31(<B4 2<(G 31(F:4 2<(A 31(FA4 3<(3 31(FF4 1G(F 31(B04 3:(0 31(B<4 1G(1 31(G14 23(: 31(<34 23(3 31(AF4 1G(< 31(:B4 1A(: 31(F14 22(A 32(214 :(G 33(1F4 12(: 3<(BF4 10(A 3<(G04 1<(0 33(1G4 13(B 33(1B4 1A(3 33(:F4 1<(F 32(0<4 2B(B 32(<04 12(0 32(F<4 1B(2 32(114 1<(F 32(A04 :(: 33(BA4 :(< 32(:04

The n mbers in parentheses are speed- p factors obtained by di)iding the interpreter e.ec tion time by the e.ec tion time of compiled code(

$&.& Code ,ize


6ne ad)antage the compiler has o)er the interpreter is that, nless a program is compiled +ith f ll string in)ocation enabled, the e.ec table code for a program need not incl de the f ll r n-time system( $or systems +ith limited memory, this can be a significant ad)antage( The si7es of e.ec table code presented here are obtained from file si7es( 1ll e.ec table files ha)e had deb gging information stripped from them( The si7e of the e.ec table code in the interpreter system is ta0en to be the si7e of the interpreter 32FG,A2G bytes4 pl s the si7e of the icode for the program being meas red 3 nder Uni. systems, the si7e of the e.ec table header, 12,G00 bytes for the * n <, is s btracted from the si7e of the icode file, beca se it is not present d ring interpretation4( >eas rements for the 1< test programs are@ Program *i7es in /ytes Program c0sol concord iidecode iiencode impress Interpreter Compiler Eatio 2G2,1A3 2G<,<1B 2GA,A2A 2G3,ABF 2:A,BAB G1,:20 :0,112 :G,30< G1,:20 0(2: 0(31 0(3< 0(2G

11<,BGG 0(3G

2 "

list memfiltr mf pssplit roffcmds sentence spande. te.tcnt +rapper

2GF,3FB 2:B,0G2 2G2,F3: 2F:,F0: 2G0,F:F 2G3,2<: 2G1,G<3 2G0,3:F 2F:,FG0

:G,30< G1,:20 F3,F2G G1,:20 G1,:20 G1,:20 F3,F2G F3,F2G

0(3< 0(2G 0(2B 0(2: 0(2G 0(2: 0(2B 0(2B

11<,BGG 0(3G

6ther factors create differences in memory sage bet+een the interpreter and compiled code( $or e.ample, the interpreter allocates a stac0 for e.pression e)al ation( 6n the * n <, this stac0 is <0,000 bytes( The compiler, on the other hand, allocates +or0 areas on a per-proced re basis and only allocates the ma.im m needed at any e.ec tion point +ithin the proced re(

2 #

Chapter 24: 6uture <or1 on the Compiler


$(.1 ,#mmary
The nderlying ideas sed in type inferencing, li)eness analysis, and temporary )ariable allocation +ere e.plored sing prototype systems before +or0 +as started on the compiler described in this dissertation( The f ndamental reasons for creating the compiler +ere to pro)e that these ideas co ld be incorporated into a complete and practical compiler for Icon, to e.plore optimi7ations that are possible sing the information from type inferencing, and to determine ho+ +ell those optimi7ations perform( The goal of pro)ing the sef lness of ideas contin es a long tradition in the Icon lang age pro5ect and in the *#6/6& lang age pro5ect before it( The prototype type inferencing system demonstrates that a nai)e implementation ses too m ch memory, implementation techniI es +ere de)eloped for the compiler to greatly red ce this memory sage( 1s the design and implementation of the compiler progressed, other problems presented themsel)es, both large and small, and sol tions +ere de)eloped to sol)e them( These problems incl de ho+ to elegantly prod ce code either +ith or +itho t type chec0ing, ho+ to generate good code for simple assignments 3a )ery important 0ind of e.pression in most Icon programs4, ho+ to generate code that ses the contin ation-passing techniI es chosen for the compilation model, and ho+ to perform peephole optimi7ations in the presence of s ccess contin ations( This dissertation describes the problems addressed by the Icon compiler and +hy they are important to the compiler, along +ith inno)ati)e sol tions( It presents a complete set of techniI es sed to implement the optimi7ing compiler( Performance meas rements demonstrate the impro)ements bro ght abo t by the )ario s optimi7ations( They also demonstrate that, for most programs, compiled code r ns m ch faster than interpreted code( Pre)io s +or0 has sho+n that simply eliminating the interpreter loop is not eno gh to prod ce large performance impro)ements Q(trGG-31(R( Therefore, the meas rements sho+ that the set of techniI es, in addition to being complete, is also effecti)e(

$(.$ %#t#re ?ork


The Icon compiler b ilds pon and adds to a large body of +or0 done pre)io sly by the Icon pro5ect( There are many problems and ideas relating to the implementation of Icon that remain to be e.plored in the f t re( *e)eral are presented in earlier chapters( 6thers are described in the follo+ing list(

The I ality of type inferencing can be impro)ed( $or e.ample, if


x ||| y

is s ccessf lly e.ec ted, both . and y m st contain lists( The c rrent )ersion of type inferencing in the compiler does not se this information, it pdates the store based on res lt types and side effects, b t not based on the arg ment types that m st e.ist for s ccessf l e.ec tion +itho t r n-time error termination( 1nother impro)ement is to e.tend the type system to incl de constants and thereby perform constant propagation a tomatically as part of type inferencing( The type system can also be e.tended to disting ish bet+een )al es created in allocated storage and those that are constant and do not reside in allocated storage( 1 descriptor that

2!&

ne)er contains )al es from allocated storage does not need to be reachable by garbage collection(

In spite of large impro)ements in the storage reI irements of type inferencing o)er the prototype system, this analysis reI ires large amo nts of memory for some programs( 1 s ggestion by John 8ececiogl Q(5ohn0(R is to e.plore the se of applicati)e data str ct res that share str ct re +ith their predecessors( Type inferencing pro)ides information abo t )al es that do not need r n-time type information associated +ith them( In the case of integers and reals, this information along +ith information from the data base abo t r n-time operations can be sed to perform comp tations on p re C )al es and to demote Icon descriptor )ariables to simple C integer and do ble )ariables( The c rrent compiler ma0es little se of these opport nities for optimi7ation( # mero s other optimi7ations sing the information from type inferencing are possible beyond +hat is c rrently being done( 6ne of them is to choose the representation of a data str ct re based on ho+ the data str ct re is sed( Translating constant e.pressions in)ol)ing integer and real )al es into the corresponding C e.pressions +o ld allo+ the C compiler to perform constant folding on them( $or other Icon types, constant folding m st be performed by the Icon compiler( This is partic larly important for csets, b t is not presently being done( 6J/agyJs prototype compiler performs t+o 0inds of control flo+ optimi7ations( It eliminates nnecessary bo nding and demotes generators that can not be res med( The code generation techniI es sed in this compiler combined +ith the peephole optimi7er a tomatically eliminate nnecessary bo nding( The peephole optimi7er also a tomatically demotes generators that are placed in-line( Cnhancements to the peephole optimi7er co ld effect the demotion of generators that are not placed inline( The compiler ses a simple he ristic to decide +hen to se the in-line )ersion of an operation and +hen to call the f nction implementing the operation sing the standard calling con)entions( >ore sophisticated he ristics sho ld be e.plored( Temporary )ariables can retain pointers into allocated storage beyond the time that those pointers are needed( This red ces the effecti)eness of garbage collection( /eca se garbage collection does not 0no+ +hich temporary )ariables are acti)e and +hich are not, it retains all )al es pointed to by temporary )ariables( This problem can be sol)ed by assigning the n ll )al e to temporary )ariables that are no longer acti)e( =o+e)er, this inc rs significant o)erhead( The trade off bet+een assigning n ll )al es and the red ced effecti)eness of garbage collection sho ld be e.plored( The Icon compiler generates C code( If it generated assembly lang age code, it co ld ma0e se of machine registers for state )ariables, s ch as the proced re frame pointer, and for holding intermediate res lts( This sho ld res lt in a significant impro)ement in performance 3at the cost of a less portable compiler and one that m st deal +ith lo+-le)el details of code generation4( *e)eral of the analyses in the compiler rely on ha)ing the entire Icon program a)ailable( *eparate compilation is )ery sef l, b t raises problems( 6n possible sol tion is to change the analyses to acco nt for incomplete information( They

2!1

co ld ass me that ndeclared )ariables can be either local or global and possibly initiali7ed to a b ilt-in f nction or n0no+n proced res, and that calls to n0no+n operations can fail, or ret rn or s spend any )al e and perform any side-effect on any globally accessible )ariable( This +o ld significantly red ce the effecti)eness of some analyses( 1nother approach is to do incremental analyses, storing partial or tentati)e res lts in a data base( This is a m ch harder approach, b t can prod ce res lts as good as compiling the program at one time(

Cnhancements to the compiler can be complemented +ith impro)ements to the r n-time system( 6ne area that can se f rther e.ploration is storage management(

2!2

2!3

Chapter 25: $ptimi'ing the Icon Compiler


This chapter details a set of optimi7ations that +ere made to the Icon compiler by 1nthony Jones in 1::B( *e)eral optimi7ations are implemented to the type inferencing system and the intermediate code generation +ith the goals of impro)ing e.ec tion time of the generated e.ec table and lo+er memory reI irements(

$).1 ntrod#ction
Compiler optimi7ations is a diffic lt b t e.citing s b5ect( There are a +ide )ariety of +ays a compiler co ld be optimi7ed( There are also different le)els that optimi7ations may be performed on( $or e.ample, one le)el of optimi7ation deals +ith the front end and intermediate code generation( *ome e.amples of these optimi7ations incl de common s be.pression elimination, copy propagation, dead-code elimination, constant folding, loop nrolling, and strength red ction( 1nother le)el of optimi7tion is machine specific, +hich might incl de efficient se of register assignments, sing platform specific instr ctions that offer greater performance, or doing peephole transformations Q1*UGBR( =o+e)er, the optimi7ations proposed for the Icon compiler are not platform specific beca se of the +ay the Icon compiler generates code( The Icon compilerJs intermediate code is act ally C( This means that Iconc translates Icon code into C code +hich then calls the nati)e C compiler to finish the 5ob( 1nother +ay a compiler can be optimi7ed is by impro)ing the performance of the compiler itself and not the generated code( These optimi7ations incl de impro)ing memory sage or ma0ing internal data str ct res more efficient( The optimi7ations proposed for the Icon compiler deal e.cl si)ely +ith the front end and intermediate code stages of compilation and impro)ing the performance of the compiler itself( *pecifically, one of the main moti)ations behind this pro5ect +as to ma0e the compiler more effecti)e by impro)ing the memory sage for the type inferencing system beca se the Icon compiler +as r nning o t of memory compiling medi m-large length programs( The ne.t concern +as the intermediate code generation( 1n e.amination of the intermediate code pro)ided many areas of impro)ement( *ome of the optimi7ations possible +ere eliminating red ndant Icon system calls, replacing Icon literals +ith C literals, and eliminating nnecessary logic in )ariable initiali7ation bloc0s( 2reas <here Iconc Can 0e Improved The ad)antage of iconcJs compiled code is that it is many times faster than interpreted code( Unfort nately, Iconc contains some ma5or problems that pre)ents the compiler from being +idely sed( The ne.t fe+ sections describe the components of the e.isting compiler that +ere optimi7ated in this pro5ect, and each section details the reasons for impro)ement( 1ll optimi7ations +ere peformed on the Iconc so rce from Icon 'ersion :, and the optimi7ed )ersion of the compiler +ill be referred to as UT*1 Iconc(

2!4

Type Inference 'ariables in Icon are implicitly typed and do not reI ire a declaration of a specific type citeY9al0er:1aa( 1ll type con)ersions are implicit in assignments and comp tations citeY9al0er:2aa( In order to a)oid type chec0s at r n-time, the Icon compiler 0eeps trac0 of the type of each )ariable and infers the types that each )ariable may hold( The lang age has all the \\normalJJ types s ch as integers, floating point n mbers, strings, and other common types, b t it also has comple. str ct re types s ch as character sets, lists, tables, and records( The type inferencing model allocates a niI e type to each so rce location at +hich heterogeneo s str ct re types s ch as lists or records are created( The Icon compiler represents all the possible types as a bit )ector +ith each bit position representing a specific type( In the co rse of compiling a large program, the n mber of total types, and therefore the si7e of the bit )ectors, can s0yroc0et( Redundant Function Calls Icon has a f nction named Poll +hich is called e)ery so often to handle certain system e)ents s ch as processing +indo+ system e)ents( The c rrent compiler does an inefficient 5ob of placing these f nction calls in the generated code( 6ften there +ill be t+o calls to Poll one right after the other or a simple assignment bet+een t+o calls( The ob5ecti)e of this part is to remo)e the red ndancy and let a reasonable n mber of calls remain( Constant Propagation *imple literals appearing in Icon so rce code are assigned into the local )ariable descriptor table +ithin a proced re( This descriptor table is an array of compilcated str ct res and pointers that inc rs many memory references simply for a constant( It is do btf l that e)en the most rob st C compilers +o ld be able to recogni7e these )al es as constants and propagate them accordingly( The ob5ecti)e is to remo)e assignments of constants into the descriptor table and replace references to those descriptor locations +ith constant )al es( Variable Initialization 1t the beginning of e)ery intermediate proced re there are se)eral loops that initiali7e local )ariables and parameters( *ometimes these loops initiali7e only one or t+o )ariables( In certain sit ations the loop +ill not be e.ec ted at all, b t the code for the loop is still generated, reI iring a comparison +hen the program e.ec tes( The ob5ect of this part is to simplify the initiali7ation loops and to remo)e loops that ha)e no effect( Changes to the Compiler )ource 1ll the changes made to the Icon compiler in order to implement these optimi7ations +ere done +ith C compiler directi)es so that each optimi7ation can be t rned on or off d ring compilation( 1ll directi)es are incl ded in src/c/define.h( The follo+ing code t rns on all optimi7ations +hich are type, red ndant f nctions, literal propagation, and loop optimi7ations respecti)ely(
#ifndef OptimizeType #define OptimizeType #endif #ifndef OptimizePoll #define OptimizePoll

2!5

#endif #ifndef OptimizeLit #define OptimizeLit #endif #ifndef OptimizeLoop #define OptimizeLoop #define LoopThreshold #endif

In the last directi)e, LoopThreshold is declared to ha)e the )al e of B( This constant is sed in the loop nrolling optimi7ations and is present so that the ser can control this )al e( It simply is a limit on the n mber of entries that may be nrolled in )ariable initiali7ation loops(

$).$ Optimizing the Type *epresentation


The first area of optimi7ation is the representation of types( Iconc maintains a str ct re for each )ariable that contains information abo t that )ariable, incl ding a bit )ector +ith each bit representing a partic lar type sed in the program( 9hen a bit )ector is allocated it is one of three possible si7es( The first si7e is composed of first class types +hich are those b ilt in types pl s ser defined types that are tili7ed( The second si7e consists of the first class types pl s intermediate )al e types( &astly, there is the n mber of total types in the database( The database refers to the collection of all b iltin operations, their n mber of parameters and types, and the type for the ret rn )al e( %ata +as gathered from Ctree, a circ lar tree )is ali7ation tool( This program consists of XA00 lines of so rce code( The n mber of possible first class types is 20: +hich translates to a 2G byte bit )ector( #ote that bit )ectors are allocated in m ltiples of a +ord 3< bytes4( % ring the co rse of the compilation, 13F,:<B bit )ectors are allocated( The n mber of first class types pl s types for intermediate )al es is 1,012, res lting in a 12G byte bit )ector, and 1G,:2A )ectors of this si7e are allocated( &astly, the n mber of database types is 1,3B: types, sing a 1F2 byte bit )ector +ith only 121 allocations of this si7e( The total memory reI irement for the bit )ectors is B(22 megabytes( This information is s mmari7ed in $ig re 2A-1( 8ector 7ype first class intermediate class database class 9umber of 7ypes 9umber #llocated :e uired <;4= 20: 1012 13B: $ig re 2A-1@ /it 'ector *i7es $ig re 2A-2 is an e.ample of +hat a bit )ector from Ctree might loo0 li0e( This e.ample sho+s the di)ision bet+een the three type classes( 9ithin the partition for first class types is bit 0 +hich represents an integer and bit B +hich is a real( 9ithin the intermediate types partition, bit 20: represents an instance of a cnode record and an instance of a list )ariable, and bit 232 is an instance of a )ariable that is of the list type( C)ery instance of a list or an aggregate type s ch as a record res lts in a ne+ type that gets its o+n bit in the bit )ector( 13F:<B 1G:2A 121 ;emory 3(G 2(< 0(02

2!

&astly, +ithin the database class are b iltin operations( The f nctions for random n mber 3O0z7_random4 and s btraction 3O114_subc4 are assigned bits 1,012 and 1,3BG respecti)ely( The f nctions are b iltin to the Icon compiler and are assigned their o+n types in the bit )ector( Title:/tmp/xfig-fig000324 Creator:fig2dev CreationDate:Wed Apr 10 20:41:49 1996

$ig re 2A-2@ *ample /it 'ector 1dditional tests +ere r n on a 2A,000 line Icon program called 3reedom in the >alaxy, +hich +as a semester long effort by %r( JefferyJs *oft+are Cngineering class( The program has h ndreds of )ariables, b t in the process of the e.ec tion, Icon reI ires many intermediate )ariables +hich dramatically increases the n mber of bit )ectors allocated d ring compilation( 3reedom in the >alaxy has 12,A:1 different distinct types incl ding b iltin, intermediate, and database types( This is an e.ample of a program that r ns o t of memory d ring compilation( .e% T8pe *epresentation The first order of b siness +as to de)elop a ne+ +ay to represent type information( The first idea +as to tili7e the pointers to a type )ector( 1ll type )ectors are pointers to arrays of integers, and the initial plan +as to change a type )ectorJs pointer to be not aligned on a < byte bo ndary in the case that the type )ector only represents a simple integer( Unfort nately, it +as disco)ered that se)eral different locations referenced the same type )ector, and any change to one +o ld not be apparent to the other( The second plan +hich +as act ally adopted +as to create a str ct re that co ld contain a pac0ed representation or a pointer to a f ll length type )ector( This allo+ed m ltiple )ariables to reference the same str ct re +hich +o ld al+ays be c rrent since only the fields of a str ct re +ere being modified( The follo+ing str ct re is the ne+ type )ector(
struct typinfo { unsigned int *bits; unsigned int packed; };

The bits field is a pointer to an array of nsigned integers +hich hold the f ll type representation of the )ariable( The packed field ser)es t+o p rposes( $irst, the lo+er 2< bits of the integer are reser)ed for the length of this type )ector +hich corresponds to either the first class, intermediate class, or database class type( This information is reI ired in case a f ll length )ector needs to be allocated( *econdly, the pper G bits +ill contain the pac0ed representation of the type )ector( These bits are set by 6Eing the field +ith en merated constants( $ig re 2A-3 lists the possible )al es of this field(

2!!

Type #U&&VT EC1&VT I#TVT C*CTVT *TEVT

'al e %escription 1 # ll type 2 Eeal type < Integer type G C *et type 1B *tring type

$ig re 2A-3@ 'alid Pac0ed Types The typinfo str ct re and defined constants for b iltin types +ere added to csym.h( =o% T8pe 2llocation <or1s The ne+ scheme for the type representation is act ally rather straightfor+ard( *imilar to the old method, a call to alloc_typ is made that ret rns a ne+ type )ector( The old method simply ret rned a pointer of type unsigned int to a portion of memory of s fficient si7e to hold the reI ested n mber of types +hile the ne+ method ret rns a pointer to struct typinfo( This str ct re contains a pac0ed representation of the type information +hich holds the most freI ently sed types s ch as integers, reals, strings, C sets, and the n ll )al e( This reI ires only an integer +hich is fo r bytes( The si7e of each bit )ector is also encoded in this integer as e.plained earlier( The str ct re also has the capacity to hold a pointer to a region of memory that can contain an entire type )ector in the e)ent that this type )ector needs to represent more than the b iltin fi)e types( The entire str ct re occ pies only eight bytes( In reality, alloc_typ does not allocate struct typinfo str ct res one at a time( /eca se an enormo s n mber of these str ct res are allocated d ring the compilation of a program, alloc_typ allocates a large n mber of these str ct res at once( C rrently, these str ct res are allocated in bloc0s of <00,000( This is done to red ce the o)erhead that malloc reI ires +hen allocated bloc0s of memory( C)ery time memory is allocated malloc needs e.tra memory 3 s ally aro nd 2-< bytes4 for boo00eeping p rposes( 1s yo can see, o)er G00,000 bytes are sa)ed by allocating this large bloc0 of str ct res( 1dditionally, malloc is generally slo+ so this change +ill impro)e pon compile time( The fi)e types that +ere chosen to be represented +ere integers, reals, strings, csets, and n ll( This is beca se the Icon compiler 0eeps a global )ariable for each one of these types that specifies +hich bit position it is 0ept in for all bit )ectors( 6ther types s ch as lists or tables +ere not s itable beca se the compiler assigns them a niI e type and bit position for each occ rrence of the )ariable( % ring normal e.ec tion, all reI ests for a type )ector ret rn the ne+ type )ector +ith the packed field initiali7ed to 7ero( It is important to note that the n ll data type is distinct from ha)ing no type at all( Thro gh the co rse of the compilation, the compiler +ill either call a f nction to set bits in the )ector or chec0 to see if a partic lar bit is set that corresponds to some type( 9hen the compiler is chec0ing for the presence of a type, the type str ct re is chec0ed for either a compact or f ll representation( 6nce that is 0no+n, a simple mas0 is created to see if the reI ested type is present( =o+e)er, the process becomes some+hat more complicated +hen the compiler reI ests that a bit is to be set( $irst, a chec0 is made to determine +hether the type str ct re contains the compact or f ll type )ector( If the reI ested type is an integer, real, string, character sets or the n ll )al e and the type str ct re ses the compact )ector, then the appropriate bit is set in the

2!"

compact )ector( 6n the other hand, if the reI ested type is not one of the special fi)e types, a f ll length )ector m st be allocated, the compact types m st be copied into it, and the ne+ type m st also be set( The last possible sit ation is if the f ll type )ector already e.ists in the type str ct re +hich simply means the reI ested type can be set +itho t any special actions or additional tests( In order to accomplish this, se)eral f nctions that manip late the type )ectors had to be changed to accomodate the ne+ representation( The follo+ing sections detail the changes and!or reorgani7ation that +as made to the Icon compiler( *eorgani'ing the Code 1fter analy7ing the f nctions that manip late type information, those f nctions that inspect, modify, or delete type bits +ere isolated( These f nctions reI ired modification so that they co ld handle the pac0ed type representation( In order to facilitate the nderstanding of these changes, these f nctions and macros that manip late type )ectors mo)ed from typinfer.c to a ne+ file called types.c( The follo+ing macros +ere modified or mo)ed to types.c(
NumInts(n_bits) ClrTyp (size, typ) CpyTyp (nsize, src, dest) MrgTyp (nsize, src, dest) ChkMrgTyp(nsize, src, dest)

ClrTyp, CpyTyp, >rgTyp, and Ch0>rgTyp +ere modified to handle the compact )ectors +hile # mInts mo)ed for the sa0e of consistency( The f nctionality of these macros has not changed( The follo+ing f nctions +ere also modifed or mo)ed to types.c(
struct typinfo *alloc_typ(unsigned int n_types); novalue set_typ(struct typinfo *type, unsigned int bit); novalue clr_typ(struct typinfo *type, unsigned int bit); int has_type(struct typinfo *type, int typcd, int clear); int other_type(struct typinfo *type, int typcd); int bitset(struct typinfo *type, int bit); int is_empty(struct typinfo *type); novalue bitrange(int typcd, int *frst_bit, int *last_bit); novalue typecd_bits(int typcd, struct type *type);

1ll the abo)e f nctions reI ired modification for the ne+ type representation e.cept for bitrange and typecd( .e% 6unctions The follo+ing f nctions +ere added to s pport the ne+ type representation and +ere placed in types.c( 1 description of the p rpose of each f nction is pro)ided after the prototypes(
unsigned int *alloc_mem_type(int unsigned int n_ntypes)

1llocates an act al bit )ector large eno gh to hold n_types n mber of bits( The pointer to the unsigned int array is ret rned(
novalue xfer_packed_types(struct typinfo *type)

Transfers the types in the pac0ed representation to the f ll length bit )ector in the same struct typinfo )ariable( It ass mes that the bits field of the struct typinfo

2!#

is )alid( The transfer is done by finding the appropriate +ord in the array +here a specific bit is s pposed to be and creating a mas0 that is 1#%ed to that position in the array(
int xfer_packed_to_bits(struct typinfo *src, struct typinfo *dest, int nsize)

Transfers the types in the pac0ed representation from src to a f ll length bit )ector, dest, of type struct typinfo pto a certain type 3bit4 in the )ector represented by nsize(
novalue and_bits_to_packed(struct typinfo *src, struct typinfo *dest, int nsize)

Performs a bit+ise 1#% on t+o type )ectors( 1ppropriate meas res +ill be ta0en for both pac0ed and f ll type representation(
unsigned int get_bit_vector(struct typinfo *src, int pos)

/ ilds a slice 3selected +ord4 of a f ll length bit )ector from a compact type form(
novalue clr_packed(struct typinfo *src, int nsize)

[eros o t the bits of the pac0ed representation(


novalue cpy_packed_to_packed(struct typinfo *src, struct typinfo *dest, int nsize)

Copies the pac0ed )ector from one )ariable to the pac0ed representation of another )ariable( That is, the so rce )ariableJs types are copied into the destination if the type is +ithin the first nsize types(
int mrg_packed_to_packed(struct typinfo *src, struct typinfo *dest, int nsize)

>erges t+o pac0ed )ectors into one( This performs a logical 1#% of all types +ithin the first nsize types( $ther Changes 6ther significant changes had to accompany the s+itch o)er to the ne+ type representation( 1ll pointer )ariables of type unsigned int that referred to a type had to be changed to a pointer to type struct typinfo( This incl ded changes in the follo+ing compiler so rce files@ cproto.h, csym.h, ctree.h, and typinfer.c( #ote that this also incl des f nction parameters( 1dditionally, there +ere f nctions that had code embedded in them to manip late the bits of a type )ector man ally( In these places, the code reI ired re+or0ing either to call the f nctions that encaps lated bit manip lations or re+riting in order to ta0e ad)antage of the compact types( These f nctions are listed in the follo+ing list follo+ed by a list of brief e.planations of the modifications(
novalue typinfer(void)

1llocates a special )ariable +ith all the bits on( This reI ired a call to alloc_mem_type in order to allocate a f ll length type )ector( 1ll the bits +ere then set to on(
struct store *alloc_stor(int store_sz, int n_types)

1llocates a store +hich incl des type information( This reI ired changing the alloc call to allocate struct typinfo instead of unsigned int(
struct symtyps *symtyps(int n_syms)

2"&

1llocates symbol tables( This also reI ired changing the alloc call to allocate struct typinfo instead of unsigned int(
novalue typ_deref(struct typinfo *src, struct typinfo *dest, int chk)

/efore the type dereferencing is performed the src +as merged +ith the dest parameters( This reI ired chec0ing for pac0ed or f ll type )ectors and handling them appropriately( 1lso, if the bo ndary bet+een first class and intermediate types falls in the middle of a +ord, those intermediate types on the bo ndary +ord are 7eroed o t(
novalue abstr_typ(struct il_code *il, struct type *typ)

In one of the cases of a switch statement t+o type )ectors are 1#%ed together( This reI ires placing a f nction call to and_bits_to_packed in place of the e.isting code(
int eval_cnv(int type_cd, int index, int def, int *cnv_flags)

This f nction determines if a type con)ersion on a type +ill s cceed( To do this, a type )ector is 1#%ed +ith se)eral different bit mas0s( This reI ired chec0ing for pac0ed or f ll bit )ectors and handling them appropriately( In the case of a pac0ed )ector, the f nction get_vector is called to b ild a +ord +ith the appropriate type bits set if they fall in the selected +ord of the type )ector(
struct argtyps *get_argtyp(void)

1llocates an arg ment list( This reI ired changing the alloc call to allocate struct typinfo instead of unsigned int( *esults of T8pe $ptimi'ation 1fter the ne+ type representation +as implemented, tests +ere r n again on Ctree( The res lts sho+ed a dramatic decrease in the reI ired amo nt of memory necessary for compilation( UT*1 Iconc reI ired one third the amo nt of memory of the old compiler( The program 3reedom in the >alaxy e)en compiled nder this optimi7ation( 1ltho gh 3reedom in the >alaxy still needed a s bstantial amo nt of memory, the important fact is that it compiled( *ection 2A(< pro)ides detailed res lts of the memory sages for both Ctree and 3reedom in the >alaxy(

$).& Optimizing the 9enerated Code


The other area of optimi7ation is the efficiency of the C code generated by the Icon compiler( The optimi7ations nderta0en +ere to remo)e red ndant calls to system f nctions, constant propagation, and )ariable initiali7ation( These optimi7ations +ere ob)io s from a c rsory e.amination of the C code generated( The goals of these optimi7ations are to ma0e the intermediate code as small as possible and to speed p the res lting e.ec table( $irst a brief s mmary of the internal representation of the C code is pro)ided( This is necessary beca se most of these optimi7ations rely hea)ily on analy7ing the internal C code( $ollo+ing this, the indi)id al optimi7ations are disc ssed in detail( Intermediate Code *epresentation This section briefly describes ho+ the intermediate C code is represented and generated internally by the Icon compiler( The ma5ority of the f nctions that generate this internal representation and print it to a file are contained in the follo+ing compiler so rce files@ ccode.c, codegen.c, and inline.c(

2"1

How Code is Generated 6nce the so rce code is parsed and e)al ated, the intermediate C code needs to be generated and o tp t to a file for compilation by the nati)e C compiler( $irst the compiler b ilds a synta. tree pl s symbol tables and other necessary str ct res( Then, the header file is created( This incl des standard definitions necessary for all Icon programs and str ct res and )ariables specific to the program being compiled( #e.t, the proccode f nction is called for each f nction in the tree( This o tp ts the f nction definition and )ariable in initiali7ation code, and then steps thro gh the synta. tree and creates C code to represent the body of the f nction( 1fter all the code for the body of the c rrent proced re is generated internally, the code is then +ritten to the file( The internal C code is represented thro gh a C str ct re called struct code +hich is sho+n belo+(
struct code { int cd_id; struct code *next; struct code *prev; union cd_fld fld[1]; };

The cd_id field is an identifier signaling +hat type of code is held in this str ct re( This field may be set to one of the follo+ing en merated )al es( Cach )al e corresponds to a type of code that can be +ritten to the intermediate C code( The table in $ig re 2A-< contains the en merated name along +ith its integer )al e and a short description( Code 7ype C_Null C_CallSig CVEet*ig CV#amed'ar CV"oto CV&abel CV&it CVEes me CVContin e CV$allThr CVP$ail CVPEet CVP* sp CV/rea0 CV&/rac0 8alue ?escription 0 #o code in this str ct 1 Call a signal 3f nction4 2 Eet rn a signal 3 Eeference a )ariable < "oto statement A &abel statement B &iteral )al e F Ees me signal G Contin e signal : $all thro gh signal 10 Proced re fail re 11 Proced re ret rn 12 Proced re s spend 13 /rea0 o t of signal handling s+itch 1< *tart of a ne+ C bloc0

2"2

Code 7ype CVE/rac0 CVCreate CVIf CV*rc&oc CVCd1ry

8alue ?escription 1A Cnd of a C bloc0 1B Call create34 for a create e.pression 1F If statement 1G *o rce file name 1: 1rray of code pieces $ig re 2A-<@ Code Types

The fld field is important and is directly lin0ed to +hat type of code the struct code is defined as( $or e.ample, if a struct code is defined as C_If then fld[0] is a pointer to another struct code that corresponds to the if portion of the statement, and fld[1] is another pointer to a struct code representing the then portion of the statement( In fact there are t+o macros for e.tracting each pointer( These macros pl s macros for all the other code types are fo nd in the ccode.h header file( =o+e)er, there is one special case that reI ires some e.planation( If the cd_id is C_CdAry then the fld element is an nspecified length of cd_fld nions( In this case all e)en indices into the array are tags describing the contents of the follo+ing array element( There is a special mar0er, A_End, that signifies the end of the array( $ig reXbrefYfig@str ctVcodeVassignVf lla sho+s these field identifiers along +ith their corresponding fields for an assignment statement( It is important to note that only +hen the cd_id is C_CdAry +ill the field identifiers be present( $ig re 2A-A gi)es the possible )al es for these tags( @lement 7ype 8alue ?escription 1V*tr 1V'al&oc 1VIntgr 1VProcCont 1V*/ f 1VC/ f 1V1ry 1VCnd 0 Pointer to a string 1 Pointer to a str ct )alVloc 2 Integer )al e 3 Proced re contin ation < *tring b ffer A Cset b ffer B Pointer to a s barray of str ct code str ct res F >ar0er for end of array $ig re 2A-A@ Clement Types $or the most part the C_CdAry is sed for miscellaneo s code that is not co)ered by the other 1: code types( >ost simple assignments fall into this category( The last t+o elements of a struct code, next and prev, are lin0s to the ne.t and pre)io s struct code str ct res in the chain(

2"3

Title:/tmp/xfig-fig000204 Creator:fig2dev CreationDate: !n Apr " 12:2#:06 1996

$ig re 2A-B@ &iteral *edundant 6unction Calls 1n e.ample of this type of optimi7ation are se)eral f nction calls needed to handle certain r n-time system acti)ities in Icon that are incl ded in the generated C code( $or e.ample, thro gho t the code Icon places a call to the f nction Poll +hich chec0s for pending e)ents s ch as +indo+ redra+s( In some cases there is a call to Poll follo+ed by an assignment and another call to Poll +hich is far too freI ent( The placement of these f nction calls can be analy7ed to determine +hen they are necessary( Analyzing Function Call Placement The sol tion to this problem entails analy7ing +here the calls to Poll +ere being placed( The Poll f nction is inserted into the generated code by the f nction setloc +hich is located in the file ccode.c of the compiler so rce( The old method for determining +hen to insert a call to this f nction is some+hat conf sing( 1lso setloc does more than insert these f nction calls so there +as no change in the +ay it determined +hen to p t a call in( Instead, a call to analyze_poll is made that determines if it is safe to remo)e the pre)io s occ rrence of the Poll f nction( To accomplish this, a global )ariable is 0ept, called lastpoll, +hich is a pointer of type struct code, and it is al+ays assigned to the location of the last Poll f nction( 6f co rse, initially lastpoll is NULL( The global )ariable is declared in ccode.c( The prototypes for the t+o ne+ f nctions are as follo+s@
int analyze_poll(void)

This f nction analy7es the code bet+een the last occ rence of the Poll f nction and the c rrent position( If there are no f nction calls 3 C_CallSig4, ret rn signals 3C_RetSig4, C code bloc0s 3C_LBrack or C_RBrack4, ret rn calls 3C_PRet4, proced re s spends 3C_PSusp4, or brea0 3C_Break4, then the pre)io s instance of Poll +ill be remo)ed, other+ise, it +ill be left in place( The reason +hy the abo)e code types are restricted is beca se they all in)ol)e calling other f nctions( If it +ere 0no+n that these f nctions +ere short and did not call other

2"4

f nctions, then the call to Poll co ld be remo)ed +itho t +orry, ho+e)er, this 0ind of detailed analysis is not performed and is inhibited by the fact that some of these f nctions represented by C_CallSig may be library f nctions and these are lin0ed at C compile time( 1lso, regardless of +hether the pre)io s instance of a call to Poll is remo)ed the ne+ call to Poll is added to the code list and the lastpoll )ariable is pdated(
novalue remove_poll(void)

This f nction act ally remo)es the call to Poll by setting the cd_id field in the struct code str ct re to C_Null( It is important to note that the struct code that represents the call to Poll is not physically deallocated from the list( ItJs cd_id field is simply set to C_Null beca se remo)ing it introd ces side effects +hich are either errors d ring C compilation or the misplacement of goto labels +hich affects the flo+ of e.ec tion and npredictable res lts( This occ rs beca se a struct code of type C_Goto may reference the remo)ed node( Icon Literals and Constant Propagation Constant progagation +as the second most diffic lt optimi7ation to implement ne.t to the ne+ type representation beca se the Icon compiler generates a comple. data str ct re that contains Icon )al es, incl ding literals( These Icon literals are assigned into this tended descriptor table e)en tho gh these )al es are constants( There are se)eral reasons to impro)e the representation of these constants( $irst, by changing these complicated Icon literals to simple C literals, the res lting e.ec table code +ill be smaller( *econdly, there is the iss e of constant propagation( In many cases, an inde. into the descriptor table is passed to a f nction or assigned to a )ariable( The I estion that arises is +hether the C compiler can detect that the descriptor table )al e being passed is a constant that can be propagated to all places +here the descriptor table is sed( $or e.ample, the follo+ing code fragment is fairly common@
r_frame.tend.d[4].dword = D_Integer; r_frame.tend.d[4].vword.integr = 1; irslt = sub(argp[0].vword.integr, r_frame.tend.d[4].vword.integr);

In this section of code, the str ct re r_frame.tend.d[4].vword.integr is assigned a )al e and then immediately sed( This code can be simplified to@
irslt = sub(argp[0].vword.integr, 1);

#ote that the assignment of the literal into the descriptor table may no longer be necessary, time sa)ings on this initiali7ation may be as great as the sa)ings for the simplified reference( Tended Descriptor Tables >ost f nctions contain a tended descriptor table( This is an array of descriptor str ct res +hich contain either an integer, pointer to a string, pointer to a bloc0, or a pointer to another descriptor location( 1 named )ariable is assigned a specific inde. into the descriptor table +hile temporary )ariables are assigned an inde., b t other temporary )ariables can be assigned into the same cell many times o)er( #amed )ariables are all those that are e.plicitly sed in the Icon so rce code s ch as loop control )ariables, and temporary )ariables are constants )al es 3regardless of type4( $or e.ample, in the first Icon code e.ample the )al e 2(< is assigned its o+n location into the descriptor table( The

2"5

same thing holds tr e for the second e.ample( The string "foo" is assigned its o+n location( /eca se both these )al es are only literals in the Icon code, they are gi)en temporary locations in the tended desciptor table that may be sed o)er again(
if (x_val = 2.4) then do_something(x_val) ... ... if (str_val == "foo") then do_something(str_val)

$or e.ample, if the constant 2.4 is not sed after the second code fragment then "foo" may be assigned into the location pre)io sly occ pied by 2.4( Analyzing Literal Assignments *e)eral ne+ f nctions +ere introd ced in order to analy7e all constants and their se( Inside the f nction proccode before the internal C code is +ritten to a file, a call to analyze_literals and propagate_literals is made +hich does the propagation( The analyze_literals f nction b ilds a table +hich contains information s ch as the scope of a descriptor entry, +hether it is safe to propagate a literal, and the literal )al e( The table str ct re is gi)en belo+(
struct int int int struct struct struct struct struct struct }; lit_tbl { modified; index; safe; code *initial; code *end; val_loc *vloc; centry *csym; lit_tbl *prev; lit_tbl *next;

The field modified is a flag +hich can be set to one of the en merated types in $ig re 2A-F( 9ame NO_LIMIT LIMITED LIMITED_TO_INT NO_TOUCH 8alue ?escription 0 %escriptor ne)er changes 1 %escriptor )al e does change, propagate any type 2 %escriptor )al e does change, propagate only if integer 3 %escriptor )al e sho ld not be propagated $ig re 2A-F@ >odify $lags The NO_LIMIT )al e refers to those descriptor locations that al+ays contain the same constant( That is, no other )al e shares the same descriptor location, and it may be propagated freely +itho t conflicts( The LIMITED )al e refers to those descriptor locations that are either re sed at some point or are modified is some +ay( The )al e LIMITED_TO_INT is similar e.cept that special care m st be ta0en +hen propagating this constant( $or e.ample, a constant s ch as a string sho ld not be propagated e)ery+here an interger may be propagated(

2"

&astly, the )al e NO_TOUCH refers to descriptor locations that sho ld not be propagated( These descriptor locations often contain loop control )ariables +hich are mar0ed as temporary b t sho ld nder no circ mstances be replaced +ith their initial )al es( $or e.ample, the first code fragment sho+s noptimi7ed code, and the second fragment is the same code b t +ith constants propagated( %escriptor location B sho ld not be to ched beca se it ser)es as a loop control )ariable +hile the se of location F may be replaced +ith its constant )al e 10 e)en tho gh the same location is assigned a ne+ )al e later on after label L9(
r_frame.tend.d[6].dword = D_Integer; r_frame.tend.d[6].vword.integr = 1; r_frame.tend.d[7].dword = D_Integer; r_frame.tend.d[7].vword.integr = 10; L8: if (!(r_frame.tend.d[6].vword.integr <= r_frame.tend.d[7].vword.integr) ) goto L9; ... ++r_frame.tend.d[6].vword.integr; goto L8; L9: r_frame.tend.d[7].dword = D_Integer; r_frame.tend.d[7].vword.integr = 7; ____________________________________________ r_frame.tend.d[6].dword = D_Integer; r_frame.tend.d[6].vword.integr = 1; L8: if (!(r_frame.tend.d[6].vword.integr$<=$ 10) ) goto L9; ... ++r_frame.tend.d[6].vword.integr; goto L8; L9: r_frame.tend.d[7].dword = D_Integer; r_frame.tend.d[7].vword.integr = 7;

The field index is the inde. into the descriptor table for each constant( The field safe refers to +hether or not it is safe to modify the end field( This field refers to the point in the intermediate code beyond +hich it is no longer safe to propagate this )al e( The end field is sometimes modified +hen inserting a ne+ entry into the literal table( This is described in detail nder the tbl_add f nction presented shortly( The fields initial and end refer to the scope +here it is safe to propagate the c rrent literal bet+een( If end is NULL then it is safe to propagate to the end of the f nction( The fields vloc and csym are pointers to either a struct val_loc or a struct centry +hich contain the constant )al e of the c rrent descriptor( The struct centry member points to the corresponding location in the global symbol table of constant )al es maintained by the Icon compiler( The fields prev and next are necessary to ma0e the table do bly lin0ed( 1lso, it sho ld be noted that the n mber of entries in the literal table is fairly small( % ring compilation of Ctree, the largest literal table sed contained 1A entries(

2"!

The analysis phase consists of stepping thro gh the struct code chain for each f nction loo0ing for each instance of a literal( $ig re 2A-G sho+s ho+ a literal is contained +ithin a struct code str ct re( 1t this point, a ne+ entry into the literal table is created that 0eeps trac0 of +here in the code the literal is assigned into the descriptor table and a pointer to the struct centry str ct re +here the literal )al e is 0ept( This phase also attempts to find the point at +hich descriptor entries are assigned ne+ )al es( Th s a scope is defined +hich the constant may only be propagated bet+een(
Title:/tmp/xfig-fig000$41 Creator:fig2dev CreationDate: at Apr 6 21:00:32 1996

$ig re 2A-G@ &iteral 6nce the analysis is complete and the literal table is b ilt then the f nction propagate_literals is called +hich goes thro gh each entry in the literal table and e.amines the code beginning at the initial field ntil the struct code referenced by the end field is enco ntered( If a struct code is fo nd that references the descriptor containing the c rrent literal then that reference is replaced by the literal itself( $ig re 2A-B ill strates a fragment of code that does an assignment, and $ig re 2A-: sho+s the same fragment +ith the second descriptor replaced +ith its literal 3ass ming that descriptor location G +as pre)io sly initiali7ed to 2F4( It is important to note that only the struct val_loc on the right side of the eI al sign +ill be replaced by its literal(

2""

Title:/tmp/xfig-fig00020$ Creator:fig2dev CreationDate: !n Apr " 12:2":42 1996

$ig re 2A-:@ 1ssignment .e% 6unctions The follo+ing f nctions +ere created to s pport the contant propagation optimi7ation( 1ll these f nctions are placed in the compiler so rce file ccode.c( Cach f nction sed in the constant propagation is prototyped and described belo+(
struct lit_tbl *alc_tbl(void)

This f nction allocates a struct lit_tbl entry that contains information abo t a literal and its sage( It first chec0s a global pointer called free_lit_tbl to see if there are any free table str ct res that may be re sed( If there are no free str ct res in this list then a ne+ str ct re is allocated( &astly, the fields are initiali7ed to predefined )al es(
novalue free_tbl(void)

This f nction frees the memory sed for the c rrent table by attaching the c rrent table to the list of free table str ct res 3freeVlitVtbl4(
novalue tbl_add(struct lit_tbl *add)

This f nction adds a ne+ str ct litVtbl str ct re into the c rrent table( The insertion is to the end of the table pl s it chec0s for the pre)io s se of the descriptor location sed in the element being added( $or the pre)io s se of the same element, that locationJs end pointer is set to the initial pointer of the element being added( In essence, this defines a scope for each descriptor location( 6nce end is set for the first time, it sho ld not be changed later(
int substr(const char *str, const char *sub)

This f nction is sed to scan strings for logical operators 3 ==, !=, >=, <=, etc4( If the string represented by sub is fo nd in str then TRUE is ret rned( It is necessary to identify these operators so a string is not propagated as an operand to one of these operators +hich is not )alid C synta.(
int instr(const char *str, int chr)

This f nction is sed to determine if a string contains an assignment operator( This f nction +ill ret rn TRUE if the string str contains any type of assignment 3 =, +=, -=, *=, /=, or %=4(

2"#

novalue invalidate(struct val_loc *v, struct code *end, int code)

This f nction sets )al es for an element in the literal table( $or all literal table entries that point to the struct val_loc represented by v the end field is set to end +ith the modified field set to code( code can be one of the follo+ing en merated )al es@ NO_LIMIT, LIMITED, LIMITED_TO_INT, or NO_TOUCH(
novalue analyze_literals(struct code *start, struct code *top, int lvl)

This f nction steps thro gh the struct code list for each f nction, b ilding p a literal table, and analy7ing the scope bet+een +hich each literal can be safely propagated( It chec0s for loop control )ariables, +hen and if the )al e of a constant descriptor location changes, and chec0s to see if a descriptor location is passed by reference to any f nctions(
novalue propagate_literals(void)

This f nction steps thro gh each entry in the literal table and begins to replace occ rences of the descriptor location +ith the literal bet+een the struct code str ct res from the initial field to the end field( The f nction eval_code is called to do the act al propagation(
int eval_code(struct code *cd, struct lit_tbl *cur)

This f nction first chec0s to see if the descriptor inde. of the code c rrently being e.amined matches that of the c rrent literal table entry( If the c rrent descriptor is accessed as an integer or a string then the descriptor is replaced +ith the literal )al e( 1lso, the modified is chec0ed to see if there are any restrictions on replacement( The table in $ig re 2A-10 lists the restrictions for each possible )al e of modified( 9ame NO_LIMIT LIMITED :eplacement :estrictions 1l+ays replace 1l+ays replace +ithin initial and end

LIMITED_TO_INT 6nly replace if sed as int, also limited by scope NO_TOUCH #e)er replace $ig re 2A-10@ Eeplacement Policy The act al replacement of a descriptor reference to a literal is accomplished by setting the c rrent inde. into the fld array to the A_Str type and allocating a string +here the literal is copied into( $ig re 2A-B and $ig re 2A-: ill strate an occ rrence of this( ,aria-le Initiali'ation 1nother iss e is the initiali7ation of the descriptor tables in each C f nction that is generated by the Icon compiler( >any of the generated f nctions contain a loop that initiali7es all the entries of the local descriptor table to the n ll descriptor( This is rather c mbersome and generates a great deal of o)erhead( Eliminating Dead Code The first optimi7ation to the )ariable initiali7ation +as to eliminate \\deadJJ code, +hich is code that is ne)er e.ec ted( In some cases the loops that initiali7e the descriptor tables resembled this@

2#&

for (i = 0; i < 0; ++i) r_frame.tend.d[i] = nulldesc;

This code is generated for Icon library f nctions in the f nction outerfnc located in codegen.c( There is a separate f nction that o tp ts similar code for ser +ritten f nctions +hich does chec0 to see if the loop +ill e)er e.ec te( /oth f nctions contain a )ariable ntend +hich hold the n mber of descriptor entries( 1 simple chec0 for eI ality +ith 7ero +as added( Loop >nrolling C)ery ser f nction initiali7es all tended descriptor entries to the )al e of the n ll descriptor nulldesc at the beginning of the f nction( It is a simple one-line for loop similar to the follo+ing code fragment(
for (i = 0; i < 3; ++i) r_frame.tend.d[i] = nulldesc;

1lso, pon e.amining the C code generated from se)eral programs, the n mber of descriptor entries per proced re rarely e.ceeds ten( /eca se this is a relati)ely small n mber, these loops can be nrolled into a series of assignments, and the loop may be remo)ed( The follo+ing code is the abo)e loop nrolled(
r_frame.tend.d[0] = nulldesc; r_frame.tend.d[1] = nulldesc; r_frame.tend.d[2] = nulldesc;

9hile this +ill increase the si7e of the generated code, the loop o)erhead is eliminated( There is a limit placed on the n mber of loop iterations that +ill be nrolled +hich is defined in define.h( C rrently, this )al e, LoopThreshold, is set to B( /eca se this n mber and the n mber of descriptor table entries is small, the n mber of nrolled elements is reasonable, and the code si7e is not greatly affected( The code that nrolls these loops is in the f nction outerfnc in the file codegen.c( /eca se this change is only se)eral lines, the code that implements loop nrolling is incl ded belo+(
#ifdef OptimizeLoop if (ntend > 0) /* Check for dead code */ for (i=0; i < ntend ;i++) fprintf(codefile, " r_frame.tend.d[%d] = nulldesc; \n", i); #else fprintf(codefile, "for (i=0; i < %d ;i++) \n", ntend); fprintf(codefile, " f_frame.tend.d[i] = nulldesc;\n;"); #endif

*esults of Code ;eneration $ptimi'ations *e)eral tests +ere r n to determine +hether the code generation optimi7ations +ere effecti)e( These optimi7ations +ere performed in hopes of impro)ing the e.ec tion speed of the compiled program, red cing the si7e of the intermediate code and the res lting e.ec table, and the compilation time( 1 brief description of the res lts follo+s, ho+e)er, a more detailed analysis of the optimi7ations is gi)en in Chapter <( 6)erall, the optimi7ations impro)ed the e.ec tion speed by a modest amo nt( The impro)ement is ro ghly bet+een B-G(2Ad( 9hile this is not a great as +as hoped, it still is an impro)ement( The code si7e of both the intermediate code and the generated

2#1

e.ec table are s prisingly smaller( The loop nrolling seemed to be offset by the constant progagation +hich eliminated nnecessary assigments and references( The si7e of the e.ec tables +ere red ced by appro.imately <-Gd for large programs, b t there +as no change in e.ec table si7e for small programs 320 lines4( The si7e of the generated C file +as consistently aro nd 3d smaller than before the optimi7ations( 1lso, on a)erage aro nd half of all calls to Poll +ere remo)ed, and in one case, t+o thirds +ere eliminated( The largest impro)ement +as to compilation time( The optimi7ations impro)ed compile time by 2<-31d on large programs and 13d on small programs, ho+e)er, it sho ld be noted that all of the tests for this section +ere performed +ith all the optimi7ations on, incl ding the type representation optimi7ation(

$).( *es#lts
This chapter presents detailed information on the res lts of each optimi7ation disc ssed in this paper( The first section disc sses the impro)ements in memory sage res lting from the type representation optimi7ations +hile the second sections presents the res lts from remo)ing red ndant f nction calls, nrolling loops, remo)ing dead code, and propagating literals( T8pe *epresentation Tests +ere performed on Ctree and 3reedom in the >alaxy to determine the ne+ memory reI irements of UT*1 Iconc( These tests +ere r n +ith only the type representation optimi7ations and no other optimi7ations that +ere co)ered in Chapter 3( The res lts sho+ a s bstantial decrease in the memory reI ired to compile the program( $or Ctree, there +ere 1AB,::2 pac0ed type str ct res allocated +hich is the total n mber of all type )ectors from the first test( 6nce the pac0ed str ct re +as allocated only 11,BA3 needed an act al first class )ector allocated( 6f the intermediate class, only A,1F2 f ll-si7e )ectors needed to be allocated( =o+e)er, all 121 of the database class )ariables needed the f ll si7ed )ector( 6)erall, the total memory sage for type representation is 2(1F megabytes +hich is 3Ad of the memory reI ired by the old type representation( The res lts are s mmari7ed in $ig re 2A-11( 8ector 7ype pac0ed class first class intermediate class database class # mber of Types # mber 1llocated EeI ired >emory 3>/4 A 20: 1012 13B: 1AB::2 11BA3 A1F2 121 1(2A 0(3 0(B 0(02

$ig re 2A-11@ >emory Usage 3Ctree4 Unfort nately, the impro)ement in memory sage +as not great eno gh for 3reedom in the >alaxy to compile on the same machine that the tests on Ctree +ere r n, ho+e)er, the program did compile on a *parc 10 +ith 12G>/ of memory +ith no one else logged on at the time( The $ig re 2A-12 contains the memory reI irements for each of the classes of )ectors( 'ector Type pac0ed class first class # mber of Types # mber 1llocated EeI ired >emory 3>/4 A 1<2A <2:<G22 11:<BG 3<(3B 21(A

2#2

'ector Type intermediate class database class

# mber of Types # mber 1llocated EeI ired >emory 3>/4 GA0G 12A:1 2<3<: F 2A(:1 0(01

$ig re 2A-12@ >emory Usage 33reedom in the >alaxy4 C)en +ith the optimi7ation, 3reedom in the >alaxy reI ires o)er G1 megabytes of memory for the type inferencing alone( /eca se $reedom in the "ala.y co ld not be compiled before the type optimi7ation, there are no n mbers to compare these +ith( =o+e)er, considering that the type optimi7ation red ce the memory reI irements for Ctree by one third, then a good estimate for the memory reI irements +o ld be aro nd 2<0 megabytesM 9hile the ne+ type representation drastically red ces the amo nt of memory sed d ring compilation, it still ses too m ch memory to be of se +hen compiling large programs on anything b t an e.pensi)e +or0station +ith a s bstantial amo nt of memory( =o+e)er, UT*1 Iconc still offers the ser the ad)antage of compiled code, and the ne+ type representation ma0es UT*1 Iconc practical on many programs that co ld not be compiled beca se of memory reI irements of the old Icon compiler( Code ;eneration This section details the res lts of the code generation optimi7ations in the area of e.ec tion speed, code si7e, and compilation time( These tests +ere r n on se)eral programs( The first program, 4eards, generates prod ction grammars, non-terminals, terminals, and epsilon sets from an inp t grammar( The second program, -hcheng, is a line editor similar to ed +hich also has re)ision control capabilities( $or the code si7e and compilation time tests, t+o other programs, Ctree and Aphere, +ere sed for tests( 4eards, -hcheng, and Ctree are all large programs +hile Aphere is incl ded beca se it is a )ery small program 3less than 2A lines4( 1ll timings performed sed the Uni. or &in . time tility( 1lso note that these timings +ere performed +ith all optimi7ations t rned on incl ding the type representation optimi7ation( Execution Speed Cach program +as r n 10 times +ith sample inp t and a)erages +ere comp ted( $ig re 2A-13 s mmari7es the e.ec tion times for 4eards and -hcheng( +rogram 8ersion 6ptimi7ed /eards Unoptimi7ed Impro)ement 6ptimi7ed Uhcheng Unoptimi7ed Impro)ement Bser 0(A 0(A2 <(:Fd 0(A: 0(B2 <(21d Aystem 0(12 0(13 :(0:d 1(11 1(2F 12(:1d @lapsed 00@01(1F 00@01(2F G(2Ad 00@01(:: 00@02(1< B(F:d

$ig re 2A-13@ C.ec tion Times

2#3

Code Size Tests +ere r n on the same t+o programs to determine if there +as an impro)ement in either the intermediate code si7e or the si7e of the res lting e.ec table( $ig re 2A-1< displays the code si7es for 4eards, -hcheng, Ctree, and Aphere( The first three programs are large 3A00-1G00 lines4 +hile Aphere is small 320 lines4( +rogram 8ersion 6ptimi7ed /eards Unoptimi7ed Impro)ement 6ptimi7ed Uhcheng Unoptimi7ed Impro)ement 6ptimi7ed Ctree Unoptimi7ed Impro)ement 6ptimi7ed *phere Unoptimi7ed Impro)ement C 3ile 2<B1A: 2A20<1 2.33C AA<01< ABG11G 2..2C 2:0A3B 2:GG13 2.11C G22G: G<:F2 3.10C $ig re 2A-1<@ Code *i7es > ch of the red ction in code si7e can be attrib ted to the remo)al of red ndant calls to Poll, and it is this red ction that offsets the loop nrolling( Impro)ements on 4eards, -hcheng, and Aphere sho+ that almost one half of all calls to Poll +ere eliminated, ho+e)er, Ctree sho+s almost a t+o thirds red ction( $ig re 2A-1A sho+s the n mber of calls to Poll for each program before and after the optimi7ation( 7est +rogram #o( /efore #o( 1fter /eards Uhcheng Ctree *phere G10 21<< F<A <0 <G1 113A 2:3 22 & 3ile 12:BF 12:BF 0.00C <B1BG <B1BG 0.00C B1A<A B1A<A 0.00C <:FAA <:FAA 0.00C @xecutable 20<G00 212::2 3.2/C 2:<:12 31:<GG 1.10C 22A2G0 23FABG /.11C 1A:F<< 1A:F<< 0.00C

$ig re 2A-1A@ # mber of Eed ndant $ nctions Eemo)ed Compilation Time &astly, the compilation times for the sample programs are gi)en( Cach program +as compiled fi)e times +ith the res lts a)eraged( 1gain, res lts for the 4eards, -hcheng, Ctree, and Aphere are in $ig re 2A-1B(

2#4

+rogram 8ersion 6ptimi7ed /eards Unoptimi7ed Impro)ement 6ptimi7ed Uhcheng Unoptimi7ed Impro)ement 6ptimi7ed Ctree Unoptimi7ed Impro)ement 6ptimi7ed *phere Unoptimi7ed Impro)ement

Bser <3(AF B0(:3

Aystem @lapsed 1(FF 00@<F(<0 1(BA 01@02(:3 2..02C

22..DC -1.21C 11B(:F 1B3(3F 22..0C BA(2B :2(2A

2(FB 02@0<(1< 2(GB 02@<:(F1 3./0C 20.2/C

2(A< 01@13(<< 2(GG 01@<F(<< 31.0/C

2D.20C 11.21C 11(:G 13(B2

1(G3 00@1B(3B 2(22 00@1G(GA 13.21C

12.0.C 11./1C

$ig re 2A-1B@ Compile Times 2nal8sis of Intermediate Code $ptimi'ations The gains in e.ec tion speed and code si7e +ere modest b t not startling( $or the most part, impro)ement +as less than 10d( =o+e)er, the res lts for compilation time are more promising( The speed p +as bet+een 2<d and 31d for large programs, +hich is bet+een a 1A and <A second impro)ement( The eliminated f nctions calls most li0ely ha)e a negligible effect on e.ec tion speed b t greatly contrib ted to the red ction in code si7e( $or e.ample, on a large program li0e -hcheng +hich contained more than 1G,B00 lines of C code, appro.imately <A0 red ndant calls +ere remo)ed( It +as not e.pected that eliminating \\deadJJ initiali7ation loop +o ld ha)e m ch effect on e.ec tion speed( Constant propagation and loop nrolling probably acco nted for the impro)ed e.ec tion times( =o+e)er, more of an impro)ement +as e.pected from the constant propagation optimi7ation( T+o possible e.planations co ld be that the nati)e C compiler is able to red ce the comple. str ct re loo0 p to its literal )al e or that the compiler has so m ch other baggage slo+ing do+n e.ec tion that the constant propagation impro)ement +as not eno gh to ma0e a great difference( The second e.planation seems more li0ely( The si7e of the intermediate code and e.ec table code +ere also modestly impro)ed( The elimination of red ndant f nction calls offset the addition of code d e to loop nrolling( 1lso, eliminating nnecessary initiali7ations for literals that +ere propagated contrib ted to the smaller code si7es( It is important to note that as it is, the compiler generates an enormo s amo nt of code for proced re contin ations and s spensions so that 2A-30d of the intermediate code are these f nctions and the rest is ser code(

2#5

&astly, the speed of compilation +as a pleasant s rprise, ho+e)er, I do belie)e that this impro)ement is d e to the type inferencing optimi7ation beca se the c rrent optimi7ations being disc ssed only add e.tra logic to impro)e the generated code( 1nother significant factor is that less memory is being sed by the type inferencing system, +hich therefore ca ses less access to )irt al memory( I sho ld note that all the tests +ere r n +ith that optimi7ation on, and the impro)ement to type inferencing simplifies the type system in many +ays( To determine if a specific bit is set, the old system had to create a mas0 and find the appropriate inde. into a long bit )ector( The ne+ system reI ires a single comparison in the case of the fi)e b iltin types(

Concl#sion
1ll of the optimi7ations disc ssed in this chapter ha)e been implemented( *ome of the optimi7ations performed e.tremely +ell +hile others did not ha)e m ch effect( The type representation change pro)ided a s bstantial impro)ement on the memory sage reI ired by the type inferencing system( 1s +as stated early, the compiler still ses too m ch memory to be of m ch se to the a)erage Icon programmer b t is m ch better s ited to offering the added speed p of compiled code +hen occasionally necessary( The intermediate code optimi7ations +ere really 5 st the tip of the iceberg of all the possible impro)ements to this area( The remo)al of red ndant calls to system calls +as a small impro)ement( &iteral propagation +as probably the most significant impro)ement along +ith loop nrolling( $ rther optimi7ations in this area are li0ely to yield the best impro)ements to performance( 6uture $ptimi'ations 1fter st dying the generated code, se)eral other optimi7ations +ere identified that may offer additional impro)ements to both the speed of e.ec tion and the si7e of the intermediate and e.ec table code( The ne.t fe+ paragraphs describe additional optimi7ations and are organi7ed in the order of the easiest to hardest to implement( 1( $or the nrolled descriptor initiali7ations change the inde.ing array to pointer arithmetic +hich is faster( $or e.ample the follo+ing code fragment is modified as follo+s@
r_frame.tend.d[0] = nulldesc; r_frame.tend.d[1] = nulldesc; _____________________________ register dptr p; p = r_frame.tend.d; (p++)->dword = nulldesc; (p++)->dword = nulldesc;

2( 1naly7e the logic of loops and also nroll smaller ones( $or e.ample, the follo+ing loop appears at the beginning of most f nctions(
for (i = 0; i < r_nargs ; ++i) deref(&r_args[i], &r_frame.tend.d[i + 0]); for(i = r_nargs; i < 2 ; ++i) r_frame.tend.d[i + 0] = nulldesc;

In this case r_nargs cannot be greater than t+o beca se it +as earlier declared to ha)e only t+o entries( It +o ld be necessary to g arantee that r_nargs can ne)er be more

2#

than t+o, b t if it is certain that there are e.actly t+o elements then +e can +rite the initiali7ation loop as follo+s@
if(r_nargs > 0) { deref(&r_args[0], &r_frame.tend.d[0]); if (r_nargs > 1) deref(&r_args[1], &r_frame.tend.d[1]); else tend.d[1].dword = D_Null; } else tend.d[0].dword = D_Null;

This optimi7ation co ld lead to a gain in e.ec tion speed( $or e.ample, if the nrolling is performed on descriptors +ith array si7es of one or t+o, appro.imately <0d of these loops +o ld be nrolled( 3( 1n easy and ob)io s sol tion +o ld be to simplify e.pressions li0e i N 0 +hich commonly occ r( This +ill not impro)e e.ec tion time beca se the C compiler +ill catch this, b t by remo)ing it before +riting the statement to the intermediate file, the compile time of the C compiler +ill be impro)ed( <( 1nother easy optimi7ation +o ld be to shorten )ariable names( This ca ses a penalty by ha)ing to +rite long names s ch as YbttYrVframe(tend(daa to file and then ha)ing the C compiler read it bac0 in( This co ld be changed to YbttYrVf(t(daa( 9hile this ma0es the intermediate C code hard to read, the intermediate code is not meant to be inspected by the ser and +ill res lt in faster compilations( A( $or the initiali7ation loops present in all f nctions, remo)e the initiali7ation of the loop control )ariable +hen nnecessary( Consider the follo+ing loop@
for (i = 0; i < r_nargs ; ++i) deref(&r_args[i], &r_frame.tend.d[i + 0]); for(i = r_nargs; i < 2 ; ++i) r_frame.tend.d[i + 0] = nulldesc;

The )ariable i in the second loop does not need to be initiali7ed since it is already at the )al e that it is s pposed to be for the second loop( The ne.t fragment of code ill strates this change(
for (i = 0; i < r_nargs ; ++i) deref(&r_args[i], &r_frame.tend.d[i + 0]); for( ; i < 2 ; ++i) r_frame.tend.d[i + 0] = nulldesc;

9hile this change is )ery easy, it is I estionable +hether this +ill pro)ide noticeable impro)ement in e.ec tion e.cept in large programs +here these loops are )ery common( B( 1ssignments of the r_frame.tend.d str ct res may be simplified( Consider the follo+ing assignment@
r_frame.tend.d[2] /* i */.vword.integr = r_frame.tend.d[4].vword.integr; r_frame.tend.d[2] /* i */.dword = D_Integer;

This co ld be changed into a single assignment as follo+s@


r_frame.tend.d[2] = r_frame.tend.d[4];

This optimi7ation +o ld reI ire more +or0 than the pre)io sly described ones( Cach struct val_loc str ct re +o ld ha)e to be e.amined, incl ding the conte.t in +hich

2#!

it is sed in order to catch assignments s ch as this, ho+e)er, these assignments are )ery common and co ld lead to s bstantial gains in e.ec tion speed( F( *imilarly, perform the same simplified descriptor assignment on global descriptor locations( 1 method needs to be created for changing global assignments s ch as@
globals[63] /* rnode */.dword = D_Integer; globals[63] /* rnode */.vword.integr = 1;

into
globals[63] /* rnode */ = onedesc;

+here onedesc is a single descriptor that already contains the )al es of the dword and vword being assigned( This co ld be performed by creating se)eral constant decriptors for the common )al es s ch as 0 or 1( &i0e the pre)io s optimi7ation, this change +ill offer a smaller impro)ement to e.ec tion speed beca se global descriptor assignments occ r m ch less freI ently( G( 9hen a )ariable is dereferenced, it is often the case that the )ariable location is passed in for both parameters to the deref f nction( $or e.ample, in the follo+ing code e.ample, r_frame.tend.d[7] is the )ariable being derefenced and the location +here the dereferenced )al e is to be placed( This can be simplified by creating another )ersion of deref, perhaps named deref1, that ta0es a single arg ment, dereferences it, and places the dereferenced )al e into the parameter location(
deref(&r_frame.tend.d[7], &r_frame.tend.d[7]);

:( 1nother iss e is red ndant constant initiali7ations( Consider the follo+ing code@
r_frame.tend.d[8].dword = D_Integer; r_frame.tend.d[8].vword.integr = 1; if (!(argp[1] /* level */.vword.integr== 1) ) goto L19 /* bound */; r_frame.tend.d[8].dword = D_Integer; r_frame.tend.d[8].vword.integr = 1;

The descriptor location G is assigned the )al e of 1 and then a conditional statement is performed +hich is follo+ed by a possible goto( If the 5 mp does not occ r then the same descriptor location is assigned the same )al e o)er again( Clearly the second assignment is +astef l and needs to be eliminated( This +o ld reI ire fairly aggressi)e analysis of the intermediate code in order to catch these code seI ences, b t does offer the benefits of increased e.ec tion speed and smaller code si7e( 1 more diffic lt optimi7ation that offers a s bstantial red ction in the si7e of the intermediate and e.ec table code deals +ith the initiali7ation f nctions that set p frames( In the case of Ctree, o)er 30d of the generated C code consists of these f nctions( $or e.ample, in Ctree there are t+o f nctions named P06i_set_value_Vtext and P03n_unset_Vbool_coupler +hich are identical e.cept for their frame str ct res, similarly defined as PF06i_set_value_Vtext and PF03n_unset_Vbool_coupler, ho+e)er, these str ct res are identical( 1 possible sol tion +o ld be to +rite o t one copy of each niI e frame str ct re along +ith its corresponding f nction that +o ld initiali7e that frame( In addition to the red ction of code si7e this +o ld res lt in faster compilations and faster loading of the res lting e.ec table( This last optimi7ations is the most diffic lt and +o ld reI ire e.tensi)e changes, ho+e)er, this optimi7ation offers the best impro)ements in code si7e, e.ec tion time, and compile time(

2#"

2##

3&&

Part III: The Implementation of >nicon

3&1

3&2

Chapter 2 : The >nicon Translator


Unicon is implemented as a minimal addition to Icon( >ost of its feat res are implemented by e.tending e.isting f nctions +ith ne+ semantics, and by the addition of ne+ f nctions and 0ey+ords +ith no ne+ synta.( 6riginally, the ob5ect-oriented facilities +ere implemented as a preprocessor named Idol 3Icon-dri)ed ob5ect lang age4( 1fter se)eral years of e.perience and e)ol tion, Idol became a part of Unicon( Conc rrently +ith this name change, than0s to Eay Pereda de)eloping a )ersion of /er0eley U1CC for Icon and Unicon, the Unicon translator +as s bstantially modified from a line-oriented preprocessor to a f ll parser that generates code by tra)ersing the synta. tree( 1t this point it is still reasonable to call the Unicon translator a preprocessor, b t it has many of the traits of a compiler(

$/.1 O6er6iew
The Unicon translator li)es in ni! nicon!( In addition to many Unicon so rce files, it ses the e.ternal tools iyacc and merr to generate its parser and synta. error message tables, +hich depend on files nigram(y and meta(err, respecti)ely( Unicon is +ritten in Unicon, creating a bootstrapping problem( 9hen b ilding from so rces, some of the (icn files can be translated by icont 3the Icon translator, a C program4( Those files that reI ire Unicon itself in order to compile are incl ded in precompiled ob5ect format 3in ( files4 in order to sol)e the bootstrapping problem(

$/.$ "e8ical Analysis


UniconJs le.ical analy7er is +ritten by hand, in Unicon, sing a le.-compatible interface( *ome of its design is borro+ed from the Icon le.ical analy7er 3+hich is hand+ritten C code4( It +o ld be interesting to replace UniconJs le.ical analy7er +ith a machine generated le.ical analy7er to red ce the amo nt of compiler so rce code +e ha)e to maintain( The le.ical analy7er consists of a f nction yyle.34 located in nicon! ni! nicon! nile.(icn, abo t A00 lines of code( Globals Comprising the Lex-compatible Public API The global declarations that e.ist in order to pro)ide a &e.-compatible 1PI incl de@
$include "ytab_h.icn" global yytext global yyin reading global yytoken global yylineno, yycolno, yyfilename # yacc's token categories # lexeme # source file we are # token (a record) # source location

Character Categories The le.ical analy7er ses se)eral csets for different character categories beyond the b ilt-in ones@
global O, D, L, H, R, FS, IS, W, idchars procedure init_csets() O := '01234567' D := &digits L := tters ++ '_'

3&3

H := &digits ++ 'abcdefABCDEF' R := &digits ++ tters FS := 'fFlL' IS := 'uUlL' W := ' \t\v' idchars := L ++ D end

The Token Type The record type storing each to0enJs information 5 st b ndles together the syntactic category 3an integer4, le.eme 3a string4, and location at +hich the to0en occ rred( This is pretty minimalist(
record token(tok, s, line, column, filename)

Global Variables for Error Handling and Debugging *e)eral Eemaining global )ariables are mainly sed for error handling, and for deb gging the le.ical analy7er itself( Reserved Words "lobal res+ords34 creates and becomes a table holding the Unicon reser)ed +ords( $or each +ord, a pair of integers Qto0enflags, categoryR is 0ept( &ang age design note@ tables in this lang age need a 2literal2 format(
procedure reswords() static t initial { t := table([Beginner+Ender, IDENT]) t["abstract"] := [0, ABSTRACT] t["break"] := [Beginner+Ender, BREAK] t["by"] := [0, BY] t["case"] := [Beginner, CASE] t["class"] := [0, CLASS] t["create"] := [Beginner, CREATE] t["default"] := [Beginner, DEFAULT] t["do"] := [0, DO] t["else"] := [0, ELSE] t["end"] := [Beginner, END] t["every"] := [Beginner, EVERY] t["fail"] := [Beginner+Ender, FAIL] t["global"] := [0, GLOBAL] t["if"] := [Beginner, IF] t["import"] := [0, IMPORT] t["initial"] := [Beginner, iconINITIAL] t["initially"] := [Ender, INITIALLY] t["invocable"] := [0, INVOCABLE] t["link"] := [0, LINK] t["local"] := [Beginner, LOCAL] t["method"] := [0, METHOD] t["next"] := [Beginner+Ender, NEXT] t["not"] := [Beginner, NOT] t["of"] := [0, OF] t["package"] := [0, PACKAGE] t["procedure"] := [0, PROCEDURE]

3&4

t["record"] := [0, RECORD] t["repeat"] := [Beginner, REPEAT] t["return"] := [Beginner+Ender, RETURN] t["static"] := [Beginner, STATIC] t["suspend"] := [Beginner+Ender, SUSPEND] t["then"] := [0, THEN] t["to"] := [0, TO] t["until"] := [Beginner, UNTIL] t["while"] := [Beginner, WHILE] } return t end

Lexical Analyzer Initialization and the Big Inhale 1 f nction, yyle.Vreinit34 is called the first time yyle.34 is called, along +ith each time the compiler mo)es to process a ne+ file named on the command line( 1long +ith initiali7ing the p blic 1PI )ariables, this f nction reads in the entire file, in a single global string )ariable, named 2b ffer2( This allo+s e.tremely fast s bseI ent processing, +hich does not file I!6 for each to0en, +hile a)oiding comple. b ffering sometimes done to red ce file I!6 costs in compilers( This 2big-inhale2 model did not +or0 +ell on original 12G8 P%P-11 U#ID comp ters, b t +or0s +ell in this cent ry( 1t present, the code ass mes Unicon so rce files are less than a megabyte -- a la7y programmerJs error( 1ltho gh Unicon programs are m ch shorter than C programs, an pper limit of 1>/ is bo nd to be reached someday(
procedure yylex_reinit() yytext := "" yylineno := 0 yycolno := 1 lastchar := "" if type(yyin) == "file" then buffer := reads(yyin, 1000000) else buffer := yyin tokflags := 0 end

Semicolon Insertion Icon and Unicon insert semicolons for yo a tomatically( This is an easy le.ical analy7er tric0( The le.ical analy7er reI ires one to0en of loo0ahead( /et+een each t+o to0ens, it as0s@ +as there a ne+line? If yes, +as the to0en before the ne+line one that co ld concei)ably be the end of an e.pression, and +as the to0en at the start of the ne+ line one that co ld concei)ably start a ne+ e.pression? If it +o ld be legal to do so, it sa)es the ne+ to0en and ret rns a semicolon instead( This little proced re is entirely hidden from the reg lar le.ical analy7er code by +riting that reg lar code in a helper f nction yyle.234, and +riting the semicolon insertion logic in a yyle.34 f nction that calls yyle.2 +hen it needs a ne+ to0en( Initiali7ation for the yyle.34 f nction sho+s the static )ariables sed to implement the one to0en of loo0ahead( If the global )ariable b ffer doesnJt hold a string anymore, !b ffer +ill s cceed and it m st be that +e are at end-of-file and sho ld ret rn 0(
procedure yylex()

3&5

static saved_tok, saved_yytext local rv, ender initial { if /buffer then yylex_reinit() } if /buffer then { if \debuglex then write("yylex() : 0") return 0 }

If +e inserted a semicolon last time +e +ere called, the sa)edVto0 +ill be the first to0en of the ne.t line, +e sho ld ret rn it(
if \saved_tok then { rv := saved_tok saved_tok := ll yytext := saved_yytext yylval := yytoken := token(rv, yytext, yylineno, yycolno, yyfilename) if \debuglex then write("yylex() : ",tokenstr(rv), "\t", image(yytext)) return rv }

6ther+ise, +e sho ld obtain the ne.t to0en by calling yyle.234( 9e ha)e to chec0 for end of file, remember if the last to0en co ld end an e.pression, call yyle.234, and pdate b ffer to be the smaller string remaining after the to0en(
ender := iand(tokflags, Ender) tokflags := 0 if *buffer=0 then { buffer := ll if \debuglex then write("yylex() : EOFX") return EOFX } buffer ? { if rv := yylex2() then { buffer := tab(0) } else { buffer := ll yytext := "" if \debuglex then write("yylex() : EOFX") return EOFX } }

1fter fetching a ne+ to0en, +e ha)e to decide +hether to insert a semicolon or not( This is based on global )ariable ender 3+hether the pre)io s to0en co ld end an e.pression4 and global )ariable to0flags 3+hich holds both +hether the c rrent to0en co ld begin an e.pression, and +hether a ne+line occ rred bet+een the last to0en and the c rrent to0en( iand34 is a bit+ise 1#%, eI i)alen to C lang age L operator, sed to pic0 bits o t of a set of boolean flags encoded as bits +ithin an integer(

3&

if ender~=0 & iand(tokflags, Beginner)~=0 & iand(tokflags, Newline)~=0 then { saved_tok := rv saved_yytext := yytext yytext := ";" rv := SEMICOL }

Eet rning a to0en reI ires allocation of a to0en34 record instance, +hich is stored in a global )ariable(
yylval := yytoken := token(rv, yytext, yylineno, yycolno, yyfilename) if \debuglex then write("yylex() : ", tokenstr(rv), "\t", image(yytext)) return rv end

The Real Lexical Analyzer Function, yylex2() This f nction maintains a table of f nctions, calling a helper f nction depending on +hat the first character in the to0en is(
procedure yylex2() static punc_table initial { init_csets() reswords := reswords() punc_table := table(uni_error) punc_table["'"] := do_literal punc_table["\""] := do_literal punc_table["!"] := do_bang punc_table["%"] := do_mod punc_table["&"] := do_and punc_table["*"] := do_star punc_table["+"] := do_plus punc_table["-"] := do_minus punc_table["."] := do_dot punc_table["/"] := do_slash punc_table[":"] := do_colon punc_table["<"] := do_less punc_table["="] := do_equal punc_table[">"] := do_greater punc_table["?"] := do_qmark punc_table["@"] := do_at punc_table["\\"] := do_backslash punc_table["^"] := do_caret punc_table["|"] := do_or punc_table["~"] := do_tilde punc_table["("] := do_lparen punc_table[")"] := do_rparen punc_table["["] := do_lbrack punc_table["]"] := do_rbrack punc_table["{"] := do_lbrace punc_table["}"] := do_rbrace punc_table[","] := do_comma punc_table[";"] := do_semi punc_table["$"] := do_dollar

3&!

every punc_table[!&digits] := do_digits every punc_table["_" | !tters] := do_letters }

The main le.ical analy7er code strips comments and +hitespace, and calls the f nction table for the first non-+hitespace character it finds( #ote s pport for eline directi)es, and the se of string scanning(
yycolno +:= *yytext repeat { if pos(0) then fail if ="#" then { if ="line " then { if yylineno := integer(tab(many(&digits))) then { =" \"" yyfilename := tab(find("\"")|0) } } tab(find("\n") | 0) next } if ="\n" then { yylineno +:= 1 yycolno := 1 if tokflags < Newline then tokflags +:= Newline next } if tab(any(' ')) then { yycolno +:= 1; next } if tab(any('\v\^l')) then { next } if tab(any('\t')) then { yycolno +:= 1 while (yycolno-1) % 8 ~= 0 do yycolno +:= 1 next } yytext := move(1) return punc_table[yytext]() } end

The f nctions in the p nct ation table select integer codes and match the rest of the le.eme( doVcomma34 ill strates an nambig o s to0en selection, +hile doVpl s34 ill strates a more common case +here the 2N2 character co ld start any of A different to0ens depending on the character3s4 that follo+ it( To0ens starting +ith 2letters2 are loo0ed p in a reser)ed +ords table, +hich tells +hether they are special, or 5 st a )ariable name(
procedure do_comma() return COMMA end procedure do_plus() if yytext ||:= =":" then { if yytext ||:= ="=" then { return AUGPLUS }

3&"

return PCOLON } if yytext ||:= ="+" then { if yytext ||:= =":=" then {return AUGUNION} return UNION } tokflags +:= Beginner return PLUS end procedure do_letters() yytext ||:= tab(many(idchars)) x := reswords[yytext] tokflags +:= x[1] return x[2] end

$/.& The @nicon !arser


UniconJs parser is +ritten sing a U1CC grammar, a grad ate st dent 3Eay Pereda4 modified /er0eleyJs p blic domain )ersion of U1CC 3byacc4 to generate Unicon code, follo+ing in the footsteps of someone +ho had earlier modified it to generate Ja)a( The Unicon parser li)es in ni! nicon! nigram(y in the so rce distrib tion 3220/, F00 lines, 11: terminals, F1 nonterminals4( UniconJs U1CC grammar +as obtained by copying the Icon grammar, and adding Unicon synta. constr cts( Prior to this time the ob5ect-oriented dialect of Icon +as called Idol and really +as a line-oriented preprocessor instead of a compiler( The start symbol for the grammar is named program, and the semantic action code fragment for this nonterminal calls the rest of the compiler 3semantic analysis and code generation4 directly on the root of the synta. tree, rather than storing it in a global )ariable for the main34 proced re to e.amine(
program : decls EOFX { Progend($1);} ;

>any conte.t free grammar r les are rec rsi)e, +ith an empty prod ction to terminate the rec rsion( The r le for declarations is typical@
decls : { $$ := EmptyNode } | decls decl { if yynerrs = 0 then iwrites(&errout,".") $$ := node("decls", $1, $2) } ;

The 2semantic action2 3code fragment4 for e)ery prod ction r le b ilds a synta. tree node and assigns it to WW for the nonterminal left-hand side of the r le( 1nother common grammar pattern is a prod ction r le that has many different alternati)es, s ch as the one for indi)id al declarations@
decl : | | | | | | | record proc global link package import invocable cl

3&#

$or s ch 2 nary2 prod ctions, childJs synta. tree node s ffices for the parent, no ne+ tree node is needed( *ome nonterminals mostly correspond to a specific seI ence of terminals, as is the case for pac0age references@
packageref : IDENT COLONCOLON IDENT { $$ := node("packageref", $1,$2,$3) } | COLONCOLON IDENT { $$ := node("packageref", $1,$2) } ;

The le.ical analy7er has already constr cted a )alid 2leaf2 for each terminal symbol, so if a prod ction r le has only one terminal symbol in it, for a synta. tree +e can simply se the leaf for that nonterminal 3for a parse tree, +e +o ld need to allocate an e.tra nary internal node4@
lnkfile : IDENT ; | STRINGLIT ;

The e.pressions 3+hich comprise abo t half of the grammar4 se a separate nonterminal for each le)el of precedence intead of U1CCJs declarations for handling precedence 3dleft, dright, etc4( The Icon and Unicon grammars approach 20 le)els of nonterminals( 1 typical r le loo0s li0e@
expr6 : | | | | expr7 expr6 expr6 expr6 expr6 ; PLUS expr7 { $$ := node("Bplus", $1,$2,$3);} ; DIFF expr7 { $$ := node("Bdiff", $1,$2,$3);} ; UNION expr7 { $$ := node("Bunion", $1,$2,$3);} ; MINUS expr7 { $$ := node("Bminus", $1,$2,$3);} ;

The 2/2 stands for 2binary2, to disting ish these operators from their nary brethren( The 20 le)els of nonterminals approach is inherited from Icon and probably ma0es the parser larger than it has to be, b t ta0ing these nonterminals o t doesnJt seem to help m ch( )8ntax 4rror =andling Icon employed a relati)ely cle)er approach to doing synta. error messages +ith U1CC -the parse state at the time of error is eno gh to do fairly good diagnoses( / t, e)ery time the grammar changed, the parse state n mbers co ld change +ildly( $or Unicon I de)eloped the >err tool, +hich associates parse error e.ample fragments +ith the corresponding diagnostic error message, and detects!infers the parse state for yo , red cing the maintenance problem +hen changing the grammar( >err also considers the c rrent inp t to0en in deciding +hat error message to emit, ma0ing it f ndamentally more precise than IconJs approach(

$/.( The @nicon !reprocessor


The Icon lang age originally did not incl de any preprocessor, b t e)ent ally, a simple one +as introd ced, +ith ability to incl de headers, define symbolic constants 3macros +itho t parameters4, and handle conditional compilation 3ifdef4( The preprocessor implementation in Unicon +as +ritten by /ob 1le.ander, and came to Unicon by +ay of Jcon, an Icon-toJ'> translator( This preprocessor is +ritten in a single B00N line file, ni! nicon!preproce(icn(

31&

The e.ternal p blic interface of the preprocessor is line-oriented, consisting of a generator preproc3filename, predefinedsyms4 +hich s spends each line of the o tp t, one after another( Its in)ocation from the main34 proced re loo0s li0e@
yyin := "" every yyin ||:= preprocessor(fName, uni_predefs) do yyin ||:= "\n"

*ince the preprocessor o tp ts line-by-line, there is a mismatch bet+een it and the le.ical analy7erJs big-inhale model( The preprocessor co ld be modified to fit better +ith the le.ical analy7er or )ice )ersa( The preprocessor f nction ta0es the filename to read from, along +ith a table of predefined symbols +hich allo+s the preprocessor to respond to lines li0e
$ifdef _SQL

based on +hat libraries are a)ailable and ho+ Unicon +as b ilt on a gi)en platform( The preprocessor34 f nction itself starts each call off +ith initiali7ations@
static nonpunctuation initial { nonpunctuation := &letters ++ &digits ++ ' \t\f\r' } preproc_new(fname,predefined_syms)

The initiali7ation code opens fname, creates empty stac0s to 0eep trac0 of nested WifdefJs and Wincl deJs, initiali7es co nters to 0 and so forth( The preprocessor is line-oriented( $or each line, it loo0s for a preprocessor directi)e, and if it does not find one, it 5 st scans for symbols to replace and ret rns the line( The main loop loo0s li0e
while line := preproc_read() do line ? { preproc_space() # eat whitespace if (="#" & match("line")) | (="$" & any(nonpunctuation)) then { suspend preproc_scan_directive() } else { &pos := 1 suspend preproc_scan_text() } }

The proced res preprocVscanVdirecti)e34 and preprocVscanVte.t34 +or0 on special and ordinary lines, respecti)ely( The line is not a parameter beca se it is held in the c rrent string scanning en)ironment( The preprocVscanVdirecti)e34 starts by discardign +hitespace and identifying the first +ord on the line 3+hich m st be a )alid preprocessor directi)e4( 1 case e.pression handles the )ario s directi)es 3define, ndef, ifdef, etc(4( %efined symbols are stored in a table( Wifdef and Wifndef are handled sing a global )ariable preprocVifVstate to trac0 the boolean conditions( 1 co nt of WifdefJs is maintained, in order to handle matching endifJs( Incl de files are handled sing a stac0, b t an additional set of filenames is 0ept to pre)ent infinite rec rsion +hen files incl de each other( 9hen a ne+ incl de directi)e is enco ntered it is chec0ed against the preprocVincl deVset and if 68, it is opened( The incl ding file 3and its associated name, line, etc4 are p shed onto a list named

311

preprocVfileVstac0( It is possible to r n o t of open files nder this model, altho gh this is not easy nder modern operating systems( Incl de files are searched on an incl de file path, consisting of a list of directories gi)en on an optional en)ironment )ariable 3&P1T=4 follo+ed by a list of standard directories( The standard directories are e.pected to be fo nd relati)e to the location of the )irt al machine binaries( The proced re preprocVscanVte.t has the relati)ely simple 5ob of replacing any symbols by their definitions +ithin an ordinary so rce line( *ince macros do not ha)e parameters, it is )astly simpler than in a C preprocessor( The main challenges are to a)oid macro s bstit tions +hen a symbol is in a comment or +ithin I otes 3string or cset literals4( 1n additional iss e is to handle m ltiline string literals, +hich occ r in Icon +hen a string literal is not closed on a line, and instead the line ends +ith an nderscore indicating that it is contin ed on the ne.t line( *0ipping o)er I oted te.t so nds simple, b t is tric0ier than it loo0s( Cscape characters mean yo canJt 5 st loo0 for the closing I ote +itho t considering +hat comes before it, and yo canJt 5 st loo0 at the preceding character since it might ha)e been escaped, as in 2bb2( The code loo0s similar to@
repeat { while tab(upto('"\\')) do { case move(1) of { "\\": move(1) default: { break break } } } # ... if not match("_",,-1) then break &subject := preproc_read() | fail # ... }

The code in preprocVread34 for reading a line does a reg lar Icon read34, end of file ca ses the preprocessor fileVstac0 to be popped for the pre)io s fileJs information( Performance has not been percei)ed as a significant problem, it it +o ld be interesting to con)ert preprocVread34 to se a big-inhale model to see if any statistical difference co ld be obser)ed( 9hen an incl de is enco ntered nder a big-inhale, the sa)ed state +o ld contain the string of remaining file contents, instead of the open file )al e(

$/.) ,emantic Analysis


The Unicon translatorJs semantic analysis is minimal, and re)ol)es mainly aro nd ob5ectoriented feat res s ch as inheritance and pac0age imports( /efore +e can loo0 at those things, +e need to loo0 at the synta. tree str ct re( In con)entional U1CC, a d nion declaration is necessary to handle the )arying types of ob5ects on the )al e stac0 incl ding the type sed for synta. tree nodes, b t iyacc has no need of this a+0+ard mechanism@ the )al e stac0 li0e all str ct re types can hold any type of )al e in each slot( *imilarly, tree nodes can hold children of any type, potentially eliminating any a+0+ardness of mi.ing to0ens and internal nodes( 6f co rse, yo do still ha)e to chec0 +hat 0ind of )al e yo are +or0ing +ith(

312

Parse Tree Nodes ni! nicon!tree(icn contains proced res to handle the synta. tree node data type, incl ding both the follo+ing declaration and the yyprint34 tra)ersal f nction +eJll be disc ssing in todayJs lect re(
record treenode(label, children)

holds one node +orth of information( $or con)enience, a proced re node3label, 0idsQR4 ta0es an arbitrary n mber of parameters and constr cts the list of children for yo ( &ea)es ha)e a n ll children field( "Code Generation" in the Unicon Translator In a reg lar preprocessor, there is no code generation, there is a te.t-filter model in +hich the preprocessor +rites o t 3modified4 )ersions of the lines it reads in( In the Unicon translator, the code that is +ritten o t is prod ced by a tra)ersal of the synta. tree( The same techniI e might be sed by a 2pretty printer2( 9e +ill e.plore this aspect of the Unicon translator as the best a)ailable demonstration of +or0ing +ith Unicon synta. trees( &ater on +e +ill consider more 2real2 code generation in the )irt al machine and the optimi7ing compiler( Carlier +e sa+ that the start symbol of the Unicon grammar had a semantic action that called a proced re Progend34( 9e +ill co)er most of that proced re ne.t +ee0 since it is all abo t ob5ect-orientation, b t at the end Progend34, a call to yyprint34 performs the tree tra)ersal for code generation( 1 classic tree tra)ersal pattern +o ld loo0 li0e@
procedure traverse(node) if node is an internal node { every child := ! node.children do traverse(child) generate code for this internal node (postfix) } else generate code for this leaf end

The code generator tra)ersal yyprint34 is a lot more complicated than that, b t fits the general pattern( The main +or0 done at )ario s nodes is to +rite some te.t to the o tp t file, yyo t( >ost ordinary internal nodes are of type treenode as described abo)e( / t beca se there are se)eral 0inds of internal nodes and se)eral 0inds of lea)es, the 2if node is an internal node2 is implemented as a case e.pression( /esides a reg lar treenode, the other 0inds of internal nodes are ob5ects of type declaration, class, and arg ment list( $or reg lar treenodes, another case e.pression on the nodeJs label field is sed to determine +hat 0ind of code to generate, if any, besides )isiting children and generating their code( The defa lt beha)ior for an internal node is to 5 st )isit the children, generating their code( $or ordinary synta. constr cts 3if, +hile, etc(4 this +or0s great and a copy of the code is +ritten o t, to0en by to0en( / t se)eral e.ceptions occ r, mainly for the pieces of Unicon synta. that e.tend IconJs repertoire( $or e.ample, pac0ages and imports are not in Icon and reI ire special treatment(
procedure yyprint(node) static lasttok case type(node) of { "treenode" : { case node.label of { "package": { } # handled by semantic analysis "import": { print_imports(node.children[2]) }

313

# implement packages via name mangling "packageref": { if *node.children = 2 then yyprint(node.children[2]) # ::ident else { # ident :: ident yyprint(node.children[1]) writes(yyout, "__") outcol +:= ((* writes(yyout, node.children[3].s)) + 2) } }

#e+ synta. constr cts s ch as proced re parameter defa lts and type restrictions, and )ariable initiali7ers, are other e.amples +here the defa lt tra)ersal +o ld o tp t things illegal in Icon( They are implemented by s0ipping some of the children 3assignment and )al e4 in the reg lar pass, and adding e.tra code else+here, disc ssed belo+(
"varlist2"|"stalist2": { yyprint(node.children[1]) } "varlist4"|"stalist4": { yyprint(node.children[1]) yyprint(node.children[2]) yyprint(node.children[3]) }

> ch of this special logic is orchestrated by the code for tra)ersing a proced re, it can )isit its arg ments and )ariable declarations and apply special r les to them(
"proc": { yyprint(node.children[1]) every yyprint(node.children[2 to 3]) if exists_statlists(node.children[3]) then { ini := node.children[4] yyprint("\ninitial {") if ini ~=== EmptyNode then { # append into existing initial yyprint(ini.children[2]) yyprint(";\n") } yystalists(node.children[3]) yyprint("\n}\n") } else every yyprint(node.children[4]) (node.children[1].fields).coercions() yyvarlists(node.children[3]) yyprint(node.children[5]) yyprint(node.children[6]) }

The defa lt beha)ior of )isiting oneJs children is )ery simple, as is the handling of other 0inds of internal nodes, +hich are ob5ects( $or the ob5ects, a method 9rite34 is in)o0ed(
"error": fail default: every yyprint(!node.children) } "declaration__state" | "Class__state" | "argList__state": node.Write(yyout)

314

The o ter case e.pression of yyprint34 contin es +ith )ario s 0inds of leaf 3to0en4 nodes( These mainly 0no+ ho+ to +rite their le.emes o t( / t, a lot of effort is made to try to 0eep line and col mn n mber information consistent( 'ariables o tline and o tcol are maintained as each to0en is +ritten o t( Integers and string literals fo nd in the synta. tree are +ritten o t as themsel)es( *ince they ha)e no attached le.ical attrib tes, they are a bit s spect in terms of maintaining deb gging consistency( It t rns o t the reason they occ r at all, and the reason they ha)e no so rce le.ical attrib tes, is that artificial synta. s btrees are generated to handle certain ob5ect-oriented constr cts, and +ithin those s btrees strings and integers may be placed, +hich do not correspond to any+here in the so rce code(
"integer": { writes(yyout, node); outcol +:= *string(node) } "string": { node ? { while writes(yyout, tab(find("\n")+1)) do { outline+:=1; outcol:=1; } node := tab(0) } writes(yyout, node); outcol +:= *node }

2#ormally2, to0ens are +ritten o t at e.actly the line and col mn they appear at in the so rce code( / t a myriad of constr cts may b mp them aro nd( If the o tp t falls behind 3in lines, or col mns4 e.tra +hitespace can be inserted to stay in sync( If o tp t gets ahead by lines, a eline directi)e can bac0 it p, b t if o tp t gets ahead by col mns, there is nothing m ch one can do, e.cept ma0e s re s bseI ent to0ens donJt accidentally get attached!concatenated onto earlier to0ens( This occ rs, for e.ample, +hen the o tp t code for an ob5ect-oriented constr ct in an e.pression is longer than the so rce e.pression, perhaps d e to name mangling( *pecific to0en combinations are chec0ed, b t the list here may be incomplete 3possible /U"M4( $or so rce to0ens, not only might the line and col mn change, the filename co ld be different as +ell(
"token": { if outfilename ~== node.filename | outline > node.line then { write(yyout,"\n#line ", node.line-1," \"", node.filename,"\"") outline := node.line outcol := 1 outfilename := node.filename } while outline < node.line do { write(yyout); outline +:= 1; outcol := 1 } if outcol >= node.column then { # force space between idents and reserved words, and other # deadly combinations (need to add some more) if ((\lasttok).tok = (IDENT|INTLIT|REALLIT) & reswords[node.s][2]~=IDENT)| (((\lasttok).tok = NMLT) & (node.tok = MINUS)) | ((\lasttok).tok = node.tok = PLUS) | ((\lasttok).tok = node.tok = MINUS) |

315

((reswords[(\lasttok).s][2]~=IDENT) & (node.tok=(IDENT|INTLIT|REALLIT)))| ((reswords[(\lasttok).s][2]~=IDENT) & (reswords[node.s][2]~=IDENT)) then writes(yyout, " ") } else while outcol < node.column do { writes(yyout, " "); outcol +:= 1 }

>ost to0ensJ le.emes are finally +ritten o t by +riting node(s@


writes(yyout, node.s) outcol +:= *node.s lasttok := node } "null": { } default: write("its a ", type(node)) } end

Keywords /esides the large set of interesting reser)ed +ords, Icon and Unicon ha)e another set of predefined special +ords called "eywords( These +ords are prefi.ed by an ampersand, for e.ample, Ls b5ect holds the c rrent 2s b5ect2 string being e.amined by string scanning( 1 proced re 8ey+ord3.1,.24 semantically chec0s that an identifier follo+ing a nary ampersand is one of the )alid 0ey+ord names( The )alid names are 0ept in a set data str ct re(

$/./ O.Aect Oriented %acilities


Unicon feat res classes, pac0ages, and a no)el m ltiple inheritance mechanism( These items are implemented entirely +ithin the Unicon translator( The Icon )irt al machine th sfar has only the slightest of e.tensions for ob5ect-orientation, specifically, the dot operator has been e.tended to handle ob5ects and method in)ocation( The Unicon 66P facilities +ere originally prototyped as a semester class pro5ect in a 2special topics2 grad ate co rse( 9riting the prototype in a )ery high-le)el lang age li0e Icon, and de)eloping it as a preprocessor +ith name mangling, allo+ed the initial class mechanism to be de)eloped in a single e)ening, and a fairly f ll, sable system +ith +or0ing inheritance to be de)eloped in the first +ee0end( /y the end of the semester, the system +as rob st eno gh to +rite it in itself, and it +as released to the p blic shortly after+ards as a pac0age for Icon called 2Idol2( >any many impro)ements +ere made after this point, often at the s ggestion of sers( 1n initial design goal +as to ma0e the absol te smallest additions to the lang age that +ere necessary to s pport ob5ect-orientation( Classes +ere )ie+ed as a )ersion of IconJs record data type, retaining its synta. for fields 3member )ariables4, b t appending a set of associated proced res( /eca se records ha)e no concept of p blic and pri)ate, neither did classes( 1nother grad ate st dent critici7ed this lac0 of pri)acy, and for se)eral )ersions, e)erything +as made pri)ate nless an e.plicit p blic 0ey+ord +as sed( / t e)ent ally s pport for pri)acy +as dropped on the gro nds that it added no positi)e capabilities and +as n-Iconish( The e.istence of classes +ith h ndreds of 2getter2 and 2setter2 methods +as considered a direct proof that 2pri)ate2 +as idiotic in a rapid prototyping lang age(

31

The Code Generation Model for Classes 2 nicon -C foo2 +ill sho+ yo +hat code is generated for Unicon file foo(icn( If foo(icn contains classes, yo can en5oy the code generation model and e.periment to see +hat it does nder )ario s circ mstances( 1s a first e.ample, consider
class A(x,y) method m() write("hello") end end

These fi)e lines generate 2A lines for Icont to translate into )irt al machine code( The first t+o lines are line directi)es sho+ing from +hence this so rce code originated@
#line 0 "/tmp/uni13804206" #line 0 "a.icn"

"lobal declarations 3incl ding proced res4 +o ld be passed thro gh the preprocessor pretty nearly intact, b t for the class, +e get a b nch of )ery different code( >ethods are +ritten o t, +ith names mangled to a classnameVmethodname format(
procedure A_m(self)

#line 2 "a.icn" write("hello"); end

T+o record types are defined, one for the class instances and one for the 2methods )ector2, or 2operation record2( The methods )ector is instantiated e.actly once in a global )ariable in classnameVVoprec format(
record A__state(__s,__m,x,y) record A__methods(m) global A__oprec

The defa lt constr ctor for a class ta0es fields as parameters and ses them directly for initiali7ation p rposes( The first time it is called, a methods )ector is created( Instances are gi)en a pointer to themsel)es in an VVs field 3mainly for historical reasons4 and to the methods )ector in an VVm field( C rrent #>*U grad st dent * mant Tambe did an independent st dy pro5ect to get rid of VVs and VVm +ith partial s ccess, b t his +or0 is not finished or rob st eno gh to be enabled by defa lt(
procedure A(x,y) local self,clone initial { if /A__oprec then Ainitialize() } self := A__state(ll,A__oprec,x,y) self.__s := self return self end procedure Ainitialize() initial A__oprec := A__methods(A_m) end

31!

Symbols and Scope Resolution 6ne of the basic aspects of semantic analysis is@ for each )ariable, +here +as it declared, so +e can identify its address, etc( Unicon inherits from Icon the c rio s con)enience that )ariables do not ha)e to be declared@ they are local by defa lt( This feat re is implemented by deferring the local )s( global decision ntil lin0 time, so the Unicon translator has no local )s( global iss es( Class )ariables, ho+e)er, ha)e to be identified, and loo0ed p relati)e to the implicit 2self2 )ariable( 1 family of proced res in ni! nicon!tree(icn +ith names starting 2scopechec02 go thro gh the synta. tree loo0ing for s ch class )ariables( &i0e most tree tra)ersals, this is a rec rsi)e process, and since local and parameter declarations o)erride class )ariables, there are helper f nctions to +al0 thro gh s btrees b ilding mini-symbol tables s ch as localV)ars in scopechec0Vproc3node4@
# Build local_vars from the params and local var expressions. local_vars := set() extract_identifiers(node.children[1].fields, local_vars) extract_identifiers(node.children[3], local_vars)

C)ent ally, e)ery identifier in e)ery e.pression is chec0ed against localV)ars, and if not fo nd there, against the class )ariables stored in a )ariable selfV)ars@
self_vars := set() every insert(self_vars, every insert(self_vars, every insert(self_vars, every insert(self_vars, c.foreachmethod().name) c.foreachfield()) (!c.ifields).ident) (!c.imethods).ident)

$or an I%C#T node, the tests boil do+n to@


if node.tok = IDENT then { if not member(\local_vars, node.s) then { if member(\self_vars, node.s) then node.s := "self." || node.s else node.s := mangle_sym(node.s) } }

Undeclared locals and globals are mangled to incl de the c rrent pac0age name if there is one( Inheritance Inheritance means@ creating a class that is similar to an e.isting class( In ob5ect-oriented literat re there is 2abstract inheritance2 in +hich a class s pports all the same operations +ith the same signat res, and there is concrete inheritance in +hich act al code is shared( Carly ob5ect-oriented lang ages s pported only concrete inheritance, +hile more recent lang ages tend to disco rage it( Unicon is not typed at compile time, so abstract inheritance is not a big deal( There are abstract methods, and classes +hose e)ery method is abstract, b t the se of abstract is mainly for doc mentation@ s bclass a thors m st pro)ide certain methods( 1nyho+, the synta. of inheritance in Unicon is
class subclass : super1 : super2 : ... ( ...fields... )

The semantics of inheritance, and partic larly of m ltiple inheritance, are interesting in Unicon, the implementation is relati)ely simple( 1n e.ample of inheritance is gi)en by class Class, from ni! nicon!idol(icn
class declaration(name,fields,tag,lptoken,rptoken) ...

31"

end ... class Class : declaration (supers, methods, text, imethods, ifields, glob, linkfile, dir, unmangled_name, supers_node)

UniI e perspecti)e on inheritance in Unicon comes from the act al acI isition of inherited data fields and methods by the s bclass( *ome ob5ect-oriented lang ages do this inheritance 2by aggregation2, creating a copy of the s perclass in the s bclass( This is fine, b t it ma0es 2o)erriding2 an anomaly, +hen o)erriding the parent +ith ne+!different beha)ior is entirely ro tine( Unicon instead inherits by the child loo0ing for things in the parent 3and the parentJs parent, etc(4 that they donJt already ha)e( In the abo)e e.ample, class declaration effecti)ely appends A fields from class declaration onto the end of its field list( The generated code for instances loo0s li0e
record Class__state(__s,__m, supers,methods,text,imethods,ifields, glob,linkfile,dir,unmangled_name,supers_node, name,fields,tag,lptoken,rptoken)

The inheritance semantics is called 2clos re based2 beca se the process of loo0ing for things to add from parent s perclasses iterates ntil no ne+ information can be added, after +hich the s bclass is said to be closed on its parents( 6ther forms of clos re appear freI ently in C*( Implementing 3ultiple Inheritance in >nicon The act al code in the Unicon translator is, by analogy to transiti)e clos re, loo0ing for things to inherit )ia a depthfirst tra)ersal of the inheritance graph( > ltiple inheritance can be separated o t into t+o portions@ 1( a method transiti)eVclos re34 that finds all s perclasses and pro)ides a lineari7ation of them, flattening the graph into a single ordered list of all s perclasses 2( a method resol)e34 that +al0s the list and loo0s for classes and fields to add( >ethod transiti)eVclos re34 is one of the cleaner demonstrations of +hy Unicon is a f n lang age in +hich to +rite comple. algorithms( It is +al0ing thro gh a class graph, b t by the +ay it is not rec rsi)e(
method transitive_closure() count := supers.size() while count > 0 do { added := taque() every sc := supers.foreach() do { if /(super := classes.lookup(sc)) then halt("class/transitive_closure: _ couldn't find superclass ",sc) every supersuper := super.foreachsuper() do { if / self.supers.lookup(supersuper) & /added.lookup(supersuper) then {

31#

added.insert(supersuper) } } } count := added.size() every self.supers.insert(added.foreach()) } end

#o+, gi)en +hat IJ)e said abo t Unicon pro)iding a depthfirst inheritance hierarchy semantics, +hat is +rong +ith this pict re? The code is stable and hasnJt needed changes in se)eral years, so I am not fishing for synta. b gs, or claiming that there is a b g( / t there is something odd( 1 chocolate 2pean t b tter c p2 is a)ailable in my office for the first correct description of the problem( The method resol)e34 +ithin class Class finds the inherited fields and methods from the lineari7ed list of s perclasses(
# # resolve -- primary inheritance resolution utility # method resolve() # # these are lists of [class , ident] records # self.imethods := [] self.ifields := [] ipublics := [] addedfields := table() addedmethods := table() every sc := supers.foreach() do { if /(superclass := classes.lookup(sc)) then halt("class/resolve: couldn't find superclass ",sc) every superclassfield := superclass.foreachfield() do { if /self.fields.lookup(superclassfield) & /addedfields[superclassfield] then { addedfields[superclassfield] := superclassfield put ( self.ifields , classident(sc,superclassfield) ) if superclass.ispublic(superclassfield) then put( ipublics, classident(sc,superclassfield) ) } else if \strict then { warn("class/resolve: '",sc,"' field '",superclassfield, "' is redeclared in subclass ",self.name) } } every superclassmethod := (superclass.foreachmethod()).name() do { if /self.methods.lookup(superclassmethod) & /addedmethods[superclassmethod] then { addedmethods[superclassmethod] := superclassmethod put ( self.imethods, classident(sc,superclassmethod) ) }

32&

} every public := (!ipublics) do { if public.Class == sc then put (self.imethods, classident(sc,public.ident)) } } end

Class and Package Specifications In the 2old days2 of UniconJs ancestor Idol, yo co ld only inherit from a class that appeared in the same so rce file( 1nything else poses a librarianJs problem of identifying from +hat file to inherit( Ja)a, for instances, ta0es a br te-force approach of one class per file( Unicon generates in each so rce directory an #%/> database 3named niclass(dir and niclass(pag4 that incl des a mapping from class name to@ +hat file the class li)es in, pl s, +hat s perclasses, fields, and methods appear in that class( $rom these specifications, 2lin02 declarations are generated for s perclasses +ithin s bclass mod les, pl s the s bclass can perform inheritance resol tion( The code to find a class specification is gi)en in idol(icnJs fetchspec34( 1 0ey fragment loo0s li0e
if f := open(dir || "/" || env, "dr") then { if s := fetch(f, name) then { close(f) return db_entry(dir, s) } close(f) }

Unicon searches for 2lin02 declarations in a partic lar order, gi)en by the c rrent directory follo+ed by directories in an IP1T= 3Icode path, or perhaps Icon path4 en)ironment )ariable, follo+ed by system library directories s ch as ipl!lib and ni!lib( This same list of directories is searched for inherited classes( The string stored in niclass(dir and ret rned from fetch34 for class Class is@
idol.icn class Class : declaration(supers,methods,text,imethods,ifields,glob,linkfile,di r,unmangled_name,supers_node) ismethod isfield Read ReadBody has_initially ispublic foreachmethod foreachsuper foreachfield isvarg transitive_closure writedecl WriteSpec writemethods Write resolve end

321

>nicon?s Progend@A revisited =a)ing presented scope resol tion, inheritance, and importing pac0ages and inheriting classes from other files )ia the niclass(dir #%/> files, +e can finally sho+ the complete semantic analysis in the Unicon compiler, prior to +riting o t the synta. tree as Icon code@
procedure Progend(x1) package_level_syms := set() package_level_class_syms := set() set_package_level_syms(x1) scopecheck_superclass_decs(x1) outline := 1 outcol := 1 # # export specifications for each class # native := set() every cl := classes.foreach_t() do { cl.WriteSpec() insert(native, cl) } # # import class specifications, transitively # repeat { added := 0 every super := ((classes.foreach_t()).foreachsuper() | ! imports) do { if /classes.lookup(super) then { added := 1 readspec(super) cl := classes.lookup(super) if /cl then halt("can't inherit class '",super,"'") iwrite(" inherits ", super, " from ", cl.linkfile) writelink(cl.dir, cl.linkfile) outline +:= 1 } } if added = 0 then break } # # Compute the transitive closure of the superclass graph. Then # resolve inheritance for each class, and use it to apply scoping rules. # every (classes.foreach_t()).transitive_closure() every (classes.foreach_t()).resolve() scopecheck_bodies(x1) if \thePackage then { every thePackage.insertsym(!package_level_syms) } #

322

# generate output # yyprint(x1) write(yyout)

$ther $$P Issues The primary mechanisms for ob5ect-oriented programming that +e ha)e disc ssed so far incl de@ classes, method in)ocation, inheritance( There +ere certainly a fe+ parts +e glossed o)er 3li0e ho+ aWs per(m34 is implemented(4 The main +ay to loo0 for additional iss es +e s0ipped is to read ni! nicon!idol(icn, +hich handles all the ob5ect-oriented feat res and comes from the original Idol preprocessor( =ere are some tho ghts from a scan of idol(icn@

the preprocessor semi-parsed class and method headers in order to do inheritance( 1fter the real 3U1CC-based4 parser +as added, I hoped to remo)e the parsing code, b t it is retained in order to handle class specifications in the niclass(dir #%/> files The classes in idol(icn correspond fairly directly to ma5or synta. constr cts, the compiler itself is ob5ect-oriented( Pac0ages are a 2)irt al synta. constr ct2@ no e.plicit representation in the so rce, b t stored in the niclass(dir database There is a c rio s data str ct re, a tab lar I e e, or taI e, that combines 3hash4 table loo0 p and preser)es 3le.ical4 ordering( 1ggregation and delegation patterns are sed a lot( 1 class is an aggregate of methods, fields, etc( and delegates a lot of its +or0 to ob5ects created for s bparts of its o)erall synta.(

2n 2side on Pu-lic Interfaces and *untime T8pe Chec1ing 6b5ect-oriented facilities are s ally disc ssed in the conte.t of large comple. applications +here soft+are engineering is an iss e( 9e donJt s ally need 66P for 100 line programs, b t for 10,000N line programs it is often a big help( /esides classes and pac0ages, Unicon adds to Icon one additional synta. constr ct in s pport of this 0ind of program@ type chec0ing and coercion of parameters( Parameters and ret rn )al es are the points at +hich type errors s ally occ r, d ring an integration phase in a large pro5ect +here one personJs code calls another( The type chec0ing and coercion synta. +as inspired by the type chec0s done by the Icon r ntime system at the bo ndary +here Icon program code calls the C code for a gi)en f nction or operator( 6ne additional comment abo t types is that the lac0 of types in declarations for ordinary )ariables s ch as 2local .2 does not pre)ent the Icon compiler iconc from determining the e.act types of +ell o)er :0d of ses at compile time sing type inference( Type chec0ing can generally be done at compile time e)en if )ariable declarations do not refer to types((( as long as the type information is a)ailable across file and mod le bo ndaries(

323

Chapter 2!: Porta-le 2/ and 3/ ;raphics


"raphics facilities in Unicon 'ersion 11 are a large component of the Unicon lang age( 'ersion 11 introd ces a po+erf l set of 3% facilities( This doc ment describes the design and implementation internals of the 2% and 3% graphics facilities and their +indo+ system implementation( It is intended for persons e.tending the graphics facilities or porting Unicon to a ne+ +indo+ system(This chapter is deri)ed from Unicon Technical Eeport eAa, The Implementation of "raphics in Unicon 'ersion 11, by Clint Jeffery and #aomi >artine7(

$3.1 ?indow ,ystems and !latform+ ndependence


This chapter describes the internals of the implementation of UniconJs graphics and +indo+ system facilities( > ch of the code is de)oted to hiding specific feat res of C graphics interfaces that +ere deemed o)erly comple. or not +orth the coding effort they entail( 6ther implementation techniI es are moti)ated by portability concerns( The graphics interface described belo+ has been implemented to )ario s le)els of completeness on the D 9indo+ *ystem, >icrosoft 9indo+s, 6*!2 Presentation >anager, and >acintosh platforms( >ost of this disc ssion is rele)ant also for Icon 'ersion :(<, UniconJs graphics facilities incl de minor impro)ements( Relevant Source File Summary This doc ment ass mes a familiarity +ith the general organi7ation and layo t of Unicon so rces and the config ration and installation process( $or more information on these topics, cons lt Icon Pro5ect %oc ments IP% 23G QT"J:BR and IP% 2<3 QT"J:GR for U#ID, and 1ppendi. / of this doc ment for >* 9indo+s( UniconJs +indo+ facilities consist of se)eral so rce files, all in the r ntime directory nless other+ise noted( They are disc ssed in more detail later in this doc ment( header files -- h/graphics.h contains str ct res and macros common across platforms( Cach platform adds platform-specific elements to the common +indo+ str ct res defined in this file( In addition, each platform gets its o+n header file, c rrently these consist of D 9indo+s 3h/xwin.h4, >icrosoft 9indo+s 3h/mswin.h4, 6*!2 Presentation >anager 3h/pmwin.h4,and the >acintosh 3h/mac.h4( C)ery platform defines se)eral common macros in the +indo+-system specific header file in addition to its +indo+ system specific str ct res and macros( The common macros are sed to insert platform-dependent pieces into platform-independent code( Unicon f"nctions -- fwindow.r contains the ET& 3E n-Time &ang age4 interface code sed to define b ilt-in graphics f nctions for the Unicon interpreter and compiler( $or most f nctions, this consists of type chec0ing and con)ersion code follo+ed by calls to platform-dependent graphics f nctions( The platform dependent f nctions are described later in this doc ment, fwindow.r is platform independent( Uo +ill generally modify it only if yo are adding a ne+ b ilt-in f nction( $or e.ample, the 9indo+s nati)e f nctions are at the bottom of this file( internal s"pport ro"tines -- rwindow.r, rwinrsc.r, rgfxsys.r and rwinsys.r are basically C files +ith some +indo+ system dependencies b t mostly consisting of code that is sed on all systems( $or e.ample, rwindow.r is almost 100 0ilobytes of portable so rce code

324

related to UniconJs e)ent model, attrib te!)al e system, portable color names, "I$ and JPC" image file s pport, palettes, fonts, patterns, spline c r)es and so forth( window.s stem specific files -- Cach +indo+ system gets its o+n so rce files for C code, incl ded by the )ario s r*.r files in the pre)io s section( C rrently these incl de rxwin.ri and rxrsc.ri for D 9indo+, rmswin.ri for >* 9indo+s, rpmwin.ri, rpmrsc.ri, and rpmgraph.ri for Presentation >anager, and rmac.ri for the >acintosh( Cach platform +ill implement one or more s ch r*.ri files( In addition, common/xwindow.c contains so many D 9indo+ incl des that it +onJt e)en compile nder U#ID *ys '!3GB E 3(2 if all of the Unicon incl des are also present -- so its a .c file instead of a .r file( tainted 5reg"lar5 Unicon so"rces -- >any of the reg lar Unicon so rce files incl de code nder #ifdef Graphics and!or one or more specific +indo+ system definitions s ch as #ifdef XWindows or #ifdef PresentationManager( The tainted files that typically ha)e to be edited for a ne+ +indo+ system incl de h/grttin.h, h/features.h, h/rexterns.h, h/rmacros.h, h/rproto.h, h/rstructs.h, and h/sys.h( 6ther files also contain Graphics code( This means that most of the system has to be recompiled +ith rtt and cc after Graphics is defined in h/define.h( Uo +ill also +ant to st dy the Graphics st ff in h/grttin.h since se)eral profo nd macros are there( 1lso, any ne+ types 3s ch as str ct res4 defined in yo r +indo+ system incl de files +ill need d mmy declarations 3of the form typedef int foo;4 to be added there( Under U#ID the +indo+ facilities are t rned on at config ration time by typing make X-Configure name=system instead of the s al make Configure in)ocation( The D config ration modifies ma0efiles and defines the symbolic constant Graphics in h/define.h( If 6pen"& libraries are detected, config ration enables them a tomatically( *imilar b t less a tomatic config ration handling is performed for other systems, for e.ample, an alternate ( bat file is sed in place of os2.bat or turbo.bat( Graphics #define-d symbols The primary, +indo+-system-independent defined symbol that t rns on +indo+ facilities is simply Graphics( Underneath this parent #ifdef, the symbol XWindows is meant to mar0 all D 9indo+ code( 6ther +indo+ systems ha)e a definition comparable to XWindows@ for >icrosoft 9indo+s it is MSWindows, for 6*!2 it is PresentationManager, and for the >acintosh, MacGraph( T rning on any of the platform specific graphics #define symbols t rns on Graphics implicitly(

$3.$ ,tr#ct#res -efined in graphics.h


The header file graphics.h defines a collection of C str ct res that contain pointers to other C str ct res from graphics.h as +ell as pointers into the +indo+ system library str ct res( The internals for the simplest Unicon +indo+ str ct re nder D11 are depicted in $ig re 1( The pict re is slightly simpler nder >* 9indo+s, +ith no display state or related color or font management, on the other hand >* 9indo+s maps the Unicon conte.t onto a large set of reso rces, incl ding pens, br shes, fonts and bitmaps(

325

$ig re 1@ Internal *tr ct re of an Unicon 9indo+ 'al e 1t the top, Unicon le)el, there is a simple str ct re called a binding that contains a pointer to a +indo+ state and a +indo+ conte.t( Pointers to bindings are stored in the FILE * )ariable of the Unicon file str ct re, and most ro tines that deal +ith a +indo+ ta0e a pointer to a binding as their first arg ment( /eneath this facade, se)eral str ct res are accessed to perform operations on each +indo+( The +indo+ state holds the typical +indo+ information 3si7e, te.t c rsor location, an Unicon list of e)ents +aiting to be read4 as +ell as a +indo+ system pointer to the act al +indo+, a pointer to a bac0ing pi.map 3a 2compatible de)ice conte.t2 sed to handle redra+ reI ests4, and a pointer to the display state( The +indo+ conte.t contains the c rrent font, foregro nd, and bac0gro nd colors sed in dra+ing!+riting to the +indo+( It also contains dra+ing style attrib tes s ch as the fill style( Conte.ts are separate from the +indo+ state so that they may be shared among +indo+s( This is a big +in, and Unicon programs tend to se it hea)ily, so in porting the +indo+ f nctions a central design iss e m st be the effecti)e se of a comparable facility on other +indo+ systems, or em lating the conte.t abstraction if necessary( #e)ertheless, one might start o t +ith Couple() and Clone() disabled and only allo+ one hard+ired conte.t associated +ith each +indo+( The display state contains +hate)er system reso rces 3typically pointers or handles4 that are shared among all the +indo+s on a gi)en display in the r nning program( $or e.ample, in D this incl des the fonts, the colors, and a +indo+ system pointer for an internal %isplay str ct re reI ired by all D library calls to denote the connection to the D ser)er(

$3.& !latform Macros and Coding Con6entions


*ince the abo)e str ct re is many layers deep and sometimes conf sing, UniconJs graphics interface ro tines employ coding con)entions to simplify things( In order to a)oid many e.tra memory references in accessing fields in the m lti-le)el str ct re, 2standard2 local )ariables are declared in most of the platform dependent interface ro tines in rxwin.ri and

32

rmswin.ri( The macro STDLOCALS(w) declares local )ariables pointing to the most commonly sed pieces of the +indo+ binding, and initiali7es them from the s pplied arg ment, each +indo+ system header sho ld define an appropriate STDLOCALS(w) macro( Under some +indo+ systems, s ch as >* 9indo+s, STDLOCALS(w) allocates reso rces +hich m st be freed before e.ec tion contin es, in +hich case a corresponding FREE_STDLOCALS(w) macro is defined( *ome common standard locals are wc, ws, stdwin, and stdpix( 9hile wc, and ws are pointers to str ct res copied from the +indo+ binding, stdwin, and stdpix are act al D 3or >* 9indo+4 reso rces that are freI ently s pplied to the platform-dependent ro tines as arg ments( Cach +indo+ system +ill ha)e its o+n standard locals( $or e.ample, >* 9indo+s adds stddc and pixdc since it ses a de)ice conte.t concept not fo nd in D11( In m ch of the platform-dependent so rce code, the +indo+ system calls are performed t+ice( This is beca se most platforms incl ding D, >* 9indo+s, and Presentation>anager do not remember the contents of +indo+s +hen they are red ced to iconic si7e or obsc red behind other +indo+s( 9hen the +indo+ is once again e.posed, it is sent a message to redra+ itself( Unicon hides this entirely, and remembers the contents of the +indo+ e.plicitly in a +indo+-si7ed bitmap of memory( The calling of platform graphics ro tines t+ice is so common that a set of macros is defined in xwin.h to facilitate it( The macros are named RENDER2 thro gh RENDER6, and each of them ta0es an Dlib f nction and then some n mber of arg ments to pass that f nction, and then calls that f nction t+ice, once on the +indo+ and once on the bitmap( Platforms that pro)ide bac0ing store may a)oid this d plicated effort( In practice ho+e)er it seems most +indo+ systems ha)e redra+ e)ents e)en if they implement retained +indo+s 3for e.ample, >"E QUhlerGGR4(

$3.( ?indow Manip#lation in rxwin.ri and rmswin.ri


The platform-dependent calls in the Unicon r n-time system can be categori7ed into se)eral ma5or areas@

9indo+ creation and destr ction &o+-le)el e)ent processing &o+-le)el te.t o tp t operations 9indo+ and conte.t attrib te manip lation

<indo% Creation and /estruction 1 graphics +indo+ is created +hen the Unicon program calls open() +ith file attrib te "g"( The +indo+ opening seI ence consists of a call to wopen() to allocate appropriate Unicon str ct res for the +indo+ and e)al ate any initial +indo+ attrib tes gi)en in additional arg ments to open()( 1fter these attrib tes ha)e been e)al ated, platform reso rces s ch as fonts and colors are allocated and and the +indo+ itself is instantiated( Under D, wopen() b sy-+aits ntil the +indo+ has recei)ed its first e.pose e)ent, ens ring that no s bseI ent +indo+ operation ta0es place before the +indo+ has appeared onscreen(

32!

1 +indo+ is closed by a call to wclose(), this remo)es the on-screen +indo+ e)en if other bindings 3Unicon +indo+ )al es4 refer to it( 1 closed +indo+ remains in memory ntil all Unicon )al es that refer to it are closed( 1 call to unbind() remo)es a binding +itho t necessarily closing the +indo+( 4vent Processing The system soft+are for each graphics platform has a h ge n mber of different types of e)ents( Unicon ignores most of them( 6f the remainder, some are handled by the r ntime system code in the .ri files implicitly, and some are e.plicitly passed on to the Unicon program( >ost nati)e graphic systems reI ire that applications be e)ent-dri)en, they m st be tightly I!6 bo nd aro nd the serJs actions( The interaction bet+een ser and program m st be handled at e)ery instant by the program( Unicon, on the other hand, considers this e)entdri)en model optional( >a0ing the e)ent-dri)en model optional means that the Unicon interface m st occasionally read and process e)ents +hen the Unicon program itself is off in some other comp tation( In partic lar, 0eystro0es and mo se e)ents m st be stored ntil the ser reI ests them, b t e.pos re e)ents and resi7es m st be processed immediately( The Unicon interpreter pa ses at reg lar inter)als in bet+een its )irt al machine instr ctions 3the Unicon compiler emits polling code in its generated C code, so +indo+ system facilities are s pported by the compiler as +ell4 and polls the system for e)ents that m st be processed, this techniI e fails +hen no )irt al machine instr ctions are e.ec ting, s ch as d ring garbage collections or +hen bloc0ed on file I!6( 6n some platforms s ch as D, this probably co ld be done sing the platform e)ent I e e manip lation ro tines( Instead, the Unicon +indo+ interface maintains its o+n 0eystro0e and mo se e)ent I e e from +hich the Unicon f nctions obtain their e)ents( This additional I e e ma0es the implementation more portable( 'ario s +indo+ systems probably do not s pport I e e manip lation to the e.tent or in the same +ay that D does( It also pro)ides the programmer +ith a higher le)el e)ent processing abstraction +hich has pro)en sef l( 9indo+ resi7ing is partly handled by the interface( The old contents of the +indo+ are retained in their original positions, b t the program is informed of the resi7e so it can handle the resi7e in a more reasonable manner( 1s has already been noted e.pos re e)ents are hidden entirely )ia the se of a bac0ing pi.map +ith identical contents for each +indo+( The pi.map starts o t the same si7e as the +indo+( It is resi7ed +hene)er the +indo+ gro+s beyond one of its dimensions( It co ld be red ced +hene)er the +indo+ shrin0s, b t then part of the +indo+ contents +o ld be lost +hene)er the ser accidentally made the +indo+ smaller than intended( The platform-dependent mod les also contains tables of type stringint( These tables are s pported by ro tines that map strings for )ario s attrib tes and )al es to nati)e +indo+ system integer constants( /inary search is employed( This approach is a cr de b t effecti)e +ay to pro)ide symbolic access 2b ilt-in2 to the lang age +itho t reI iring incl de files( 1dditional tables mapping strings to integers are fo nd in the platform independent so rce files(

32"

*esource 3anagement 6ne of the most important tas0s performed by platform-specific graphics f nctions is the management of reso rces, both the on-screen reso rces 3+indo+s4 and the dra+ing conte.t elements sed by the +indo+ system in performing o tp t( 3emor8 3anagement and r*rsc.ri 6iles >emory management for internal +indo+ str ct res is independent of UniconJs standard memory management system( Dlib memory is allocated sing malloc(2)( >ost internal Unicon +indo+ str ct res co ld be allocated in UniconJs bloc0 region, b t since they are acyclic and cannot contain any pointers to Unicon )al es, this +o ld ser)e little p rpose 31ct ally, it is probably the right thing to do, and +ill probably happen some day, b t its 5 st not in the cards right no+ nless yo feel li0e messing +ith the garbage collector(4( In addition +hen an incoming e)ent is being processed it has to be matched p +ith the appropriate +indo+ state str ct re, so some of the +indo+ str ct res m st be easily reached, not lost in the bloc0 region( The +indo+ interface str ct res are reference co nted and freed +hen the reference co nt reaches 0( Color 3anagement >anaging colors nder D 9indo+s is painf l( In partic lar, if the same color is allocated t+ice the color table entry is shared 3+hich is good4 and that entry may only be freed once 3+hich is bad4( $or this reason, e)ery color allocated by Unicon is remembered and d plicate reI ests are identified and freed only once( In the general case it is impossible to detect +hen a partic lar color is no longer being displayed, and so colors are only freed on +indo+ clos re or +hen a +indo+ is cleared( 6ont 3anagement Unicon s pports a portable font name synta.( *ince the a)ailable fonts on systems )ary +idely, 2interesting2 code has been +ritten to s pport these portable names on )ario s D ser)ers( Cach +indo+ system may need to incl de he ristics to pic0 an appropriate font in the font allocation ro tine in the +indo+ systemJs r*.ri file(

$3./ 58ternal mage %iles and %ormats


Eeading and +riting +indo+ contents to e.ternal files is accomplished by the ro tines loadimage() and dumpimage(), implemented in each platformJs +indo+ system specific file, s ch as rxwin.ri( These ro tines ta0e a +indo+ binding and a string filename and perform the I!6 transfer( Presently, the file format is ass med to be indicated by the filename e.tension, this is li0ely to change( Ideally Unicon sho ld tolerate different file formats more fle.ibly, inferring inp t file formats by reading the file header +here possible, and r nning e.ternal con)ersion programs +here appropriate( "I$ and JPC" files are self-identifying, so they are al+ays recogni7ed independent of name( They are chec0ed in system-independent code before platform-specific image reading code is in)o0ed(

$3.3 mplementation of &- %acilities


In order to implement the 3% facilities, the Unicon r ntime system +as modified to s pport 2% and 3% +indo+s( The Unicon r ntime system is +ritten in a c stom s perset

32#

dialect of C called ET& Q9al0er:<R( The 3% facilities se the e.isting 2% facilities code for +indo+ creation and destr ction, as +ell as handling 0eyboard and mo se inp t( 3/ 6acilities *e:uirements 6pen"& 1(2 or later m st be present on the system in order for UniconJs 3% graphics facilities to +or0( 1 chec0 for this is performed in wopengl() +hich can be fo nd in the file ropengl.ri( The reI irement of 6pen"& 1(2 is based on the fact that the f nction glTexBind(), +hich ma0e the implementation of te.t res more efficient, is only a)ailable in 6pen"& 1(2 and later( 1lso needed for the Unicon 3% graphics facilities is a system that s pports a tr e color )is al +ith a depth b ffer of 1B and a do ble b ffer( The reI irement of a depth b ffer is a necessity to implement lighting( $or lighting to +or0 properly in 6pen"&, a depth test m st be performed, hence the need of a depth b ffer( 1 do ble b ffer is needed to implement the list str ct re that is sed to redra+ a +indo+( >ore information can be fo nd on redra+ing of +indo+s in section F(3( 6iles *e)eral e.isting files contain e.tensions to s pport the Unicon 3% graphics facilities nder eifdef "raphics3%, incl ding data(r 3ne+ r ntime error codes4, f+indo+(r 3ne+ 3% f nctions4, rmemmgt(r 33% +indo+ display lists4, r.+in(ri and rms+in(ri 3modified +open34 and +map34 to s pport 3d mode4, r+indo+(r 3ne+ 3% attrib tes4, and graphics(h 3ne+ 3% fields in can)as and conte.t str ct res4( 1lso a ne+ file, ropengl(ri +as added that contains the C helper f nctions for f nctions in fwindow.r, rxwin.ri, and rwindow.r( *edra%ing <indo%s In the 2% graphics facilities, e)ents that reI ire the redra+ing of a +indo+ are handled by sing a pi.map( Instead of sing a pi.map, for the Unicon 3% graphics facilities, a Unicon list of lists is created for each +indo+ opened in gl mode( This list of lists 0eeps trac0 of all ob5ects in a 3% graphics scene( This list is called funclist and is fo nd in the +state str ct re of a "gl" +indo+( The indi)id al lists of contain the f nction name and the parameters of that f nction( 1lso placed on the list are attrib tes that affect the scene( These incl de dim, linewidth, texcoord, texture, texmode, and fg( 9hen a +indo+ recei)es an e)ent that reI ires redra+ing, the +indo+ is cleared, all attrib tes are reset to the defa lts, and the Unicon list of lists is tra)ersed to redra+ e)ery ob5ect in the scene( There are some f nctions and attrib tes that are not placed in the list( Instead they m ch either modify the list or call the list to redra+ the scene( The f nction EraseArea(), not only clears the screen b t also clears the contents of the list( The attrib tes light0-light7, eye, eyeup, eyedir, and eyepos se the list to redra+ the +indo+ +ith the ne+ attrib tes( *o if the position of a light changes, the ne+ lighting calc lations are preformed and the scene is redra+( /esides these f nctions and attrib tes, e)ery f nction or attrib te a)ailable in the 3% graphics facilities is placed on this list( In is important to note that f nctions and attrib tes that ha)e no effect in the 3% graphics facilities are not placed in this list( Textures In 6pen"&, te.t res can be one, t+o, or three-dimensional and are represented as m ltidimensional arrays( In the Unicon 3% graphics facilities all te.t re are 2% dimensional, and

33&

represented as three-dimensional arrays( This array 0eeps trac0 of the position and red, green, and bl e components of each pi.el in the te.t re image( 9hen a te.t re image is specified in a Unicon program, the te.t re is con)erted from the Unicon internal representation of the image to a three-dimensional array( $or most cases, this does not ta0e a long time, b t as a te.t re image gets larger, the slo+er the application +ill r n( *e)eral meas res ha)e been ta0en in order to increase the efficiency of con)erting the te.t re image into an array( *ince lighting and te.t ring are fairly e.pensi)e operations, especially if se)eral lights are acti)ated, these feat res are temporarily deacti)ated( %espite these efforts, con)erting a 2g" +indo+ to a te.t re is still fairly e.pensi)e( Possible f t re +or0 incl des +ays to speed p this process( Instead of adding a te.t re to the list of lists as described in section F(3, 6pen"&fs internal te.t re reso rces are sed( 6pen"& assigns to each te.t re a name( The names assigned to each te.t re in a Unicon scene are stored in texname[], +hich can be fo nd in a gl +indo+fs conte.t( To ens re that a te.t re name is not re sed, a call to glGenTextures() made +hich prod ces n sed te.t re names( 9hen a te.t re is applied to the scene, only the inde. of the array texname[] is stored in the list( 9hen the list is tra)ersed, a call to glBindTexture() is made +hich binds the te.t re to the s bseI ent ob5ects in the scene( 6ne problem of sing this representation of te.t res is that it places an pper bo nd on the n mber of te.t re sed( This is beca se glGenTextures() reI ires the n mber of te.t re names to generate( 1lso by sing glBindTexture(), ne)er deletes a te.t re from the te.t re reso rces, possibly sing p all te.t re reso rces( $ t re +or0 might loo0 into +hen to delete a te.t re an +ays to chec0 +hen the te.t re reso rces ha)e been sed p( Texture Coordinates The primiti)es as mentioned in pre)io s sections are c bes, tori, cylinders, dis0s, partial dis0s, points, lines, polygons, spheres, line segments, and filled polygons( *ome of these primiti)es are dra+n sing different aspect of the 6pen"& library, +ith some sing the gl library( Points, lines, line segments, polygons, and filled polygons are dra+ing sing glBegin(), glEnd(), and )ertices in bet+een the f nction calls( Cylinders, dis0s, partial dis0s, and spheres are implemented sing the gl library( They are considered to be gluQuadrics ob5ects( $inally c bes and tori are a composition of se)eral polygons( The te.t ring method sed is infl enced by the ho+ the primiti)e is composed( $or the primiti)es b ilt sing the 6pen"& library, defa lt te.t re coordinates are obtain m ch differently than those primiti)es b ilt sing the gl library( $or those primiti)es b ilt sing glBegin() and glEnd(), glTexGen() is sed to determine the defa lt parameters +hen "texcoord=auto"( In order to se this feat re +e m st enable GEN_S and GEN_T +ith glEnable(). This generates te.t re coordinates for a 2% te.t res( The te.t re coordinates for a tor s are obtained in the same +ays( Primiti)es b ilt sing the gl library, ha)e te.t re coordinates associated +ith them( These te.t re coordinates can be obtained by calling gluQuadricTexture()(The se of the gl te.t re coordinates )erses the 6pen"& coordinates, is d e to the fact that the gl te.t re coordinate loo0 nicer( In order to se these te.t re coordinates instead of the ones specified by 6pen"&, it is necessary to disable GEN_S and GEN_T( 1fter the ob5ect has been dra+n, GEN_S and GEN_T are t rned bac0 on( 1 c be ses defa lt te.t re coordinates that map the te.t re onto each of the faces of a c be( In order to se these defa lt coordinates, it is necessary to disable GEN_S and GEN_T, similar to gl ob5ects(

331

$3.4 9raphics %acilities !orting *eference


This section doc ments the +indo+-system specific f nctions and macros that generally m st be implemented in order to port UniconJs graphics facilities to a ne+ +indo+ system( The list is compiled primarily by st dying fwindow.r, rwindow.r, and the e.isting platforms( 1 note on types@ w is a +indo+ binding pointer 3wbp4, the top le)el Unicon 2+indo+2 )al e( i is an integer, s is a string( wsp is the +indo+ state 3a(0(a( can)as4 pointer, and wcp is the +indo+ conte.t pointer( 1 bool ret rn )al e ret rns one of the C macro )al es Succeeded or Failed, instead of the s al C booleans 1 and 0( 1#"&C3a4 Con)ert from radians into +indo+ system nits( $or e.ample, nder D these are 1!B< of a degree integer )al es, +hile nder Presentation>anager it con)erts to nits of 1!BAA3B of a degree in a fi.ed-point format( ARCHEIGHT(arc) The height component of an D1rc( ARCWIDTH(arc) The +idth component of an D1rc( ASCENT(w) Eet rns the n mber of pi.els abo)e the baseline for the c rrent font( #ote that +hen Unicon +rites te.t, the 3.,y4 coordinate gi)es the left edge of the character at its baseline, some +indo+ systems may need to translate o r coordinates( int blimage(w, x, y, width, height, ch, s, len) %ra+s a bi-le)el 3i(e( monochrome, 1-bit-per-pi.el4 image, sed in DrawImage() +hich dra+s bitmap data stored in Unicon strings( wcp clone_context(w) 1llocate a ne+ conte.t, cloning attrib tes from +Js conte.t( COLTOX(w, i) Eet rn integer con)ersion from a 1-based te.t col mn to a pi.el coordinate( copyArea(w1, w2, x, y, width, height, x2, y2) Copies a rectang lar bloc0 of pi.els from +1 to +2( DESCENT(w) Eet rns the n mber of pi.els belo+ the baseline for the c rrent font( %I*P&1U=CI"=T3+4 Eet rn +Js display 3screen4 height in pi.els( DISPLAYWIDTH(w)

332

Eet rn +Js display +idth in pi.els( bool do_config(w, i) Performs mo)e!resi7e operations after one or more attrib tes ha)e been e)al ated( Config is a +ord +ith t+o flags@ the one bit indicates a mo)e, the t+o bit indicates a resi7e( The desired si7es are in the +indo+ state pointer, e(g( +-T+indo+-T+idth( drawarcs(w, thearcs, i) %ra+ i arcs on +, gi)en in an array of D1rc str ct res( %efine an appropriate D1rc str ct re for yo r +indo+ system, it m st incl de fields ., y and +idth and height fields accessible thro gh macros ARCWIDTH() and ARCHEIGHT()( 1lso, a starting angle angle1 and arc e.tent angle2, assigned thro gh macros ANGLE(), EXTENT(), and FULLARC( This is c rrently a mess( Imitation of the D or Presentation>anager code is in order( dra+lines3+, points, i4 %ra+ i-1 connected lines, connecting the dots gi)en in points( dra+points3+, points, i4 %ra+ i points( dra+segments3+, segs, i4 %ra+ i disconnected line segments, define an Dsegment str ct re appropriate do yo r +indo+ system, consisting of fields .1, y1, .2, y2( This type definition reI irement sho ld be cleaned p someday( dra+string3+, ., y, s, sVlen4 %ra+ string s at coordinate 3.,y4 on +( #ote that y designates a baseline, not an pper-left corner, of the string( dra+rectangles3+, rectangles, i4 %ra+ i rectangles( %efine an DEectangle str ct re appropriate to yo r +indo+ system(

int d mpimage3+, s, ., y, +idth, height4 9rite an image of a rectang lar area in + to file s( Eet rns Succeeded, Failed, or NoCvt if the platform doesnJt s pport the reI ested format( #ote that this is the 2platform- dependent image +riting f nction2, reI ests to +rite "I$ or JPC" are handled o tside of this f nction( erase1rea3+, ., y, +idth, height4 Crase a rectang lar area, that is, set it to the c rrent bac0gro nd color( Compare +ith fillrectangles()( CDTC#T3a4 Con)ert from radians into +indo+ system nits, e(g( nder Presentation>anager it con)erts to nits of 1!BAA3B of a circle and does some +eird type con)ersion(

333

fillarcs3+, arcs, i4 $ill +edge-li0e arc sections 3pie pieces4( *ee drawarcs()( fillrectangles3+, rectangles, i4 $ill i rectangles( *ee drawrectangles()( fillpolygon3+, points, i4 $ill a polygon defined by i points( Connect first and last points if they are not the same( $=CI"=T3+4 Eet rns the pi.el height of the c rrent font, hopef lly ASCENT + DESCENT. freeVbinding3+4 $ree binding associated +ith w( This gets rid of a binding that refers to w, +itho t necessarily closing the +indo+ itself 3other bindings may point to that +indo+4( freeVconte.t3+c4 $ree +indo+ conte.t wc( freeVm table3+, i4 $ree m table color inde. i( freeV+indo+3+s4 $ree +indo+ can)as ws( freecolor3+, s4 $ree a color allocated on wJs display( $*V*6&I% %efine this to be the +indo+ systemJs solid fill style symbol( $*V*TIPP&C %efine this to be the +indo+ systemJs stippled fill style symbol( $U&&1EC 9indo+-system )al e for a complete 33B0 degree4 circle or arc( $9I%T=3+4 Eet rns the pi.el +idth of the +idest character in the c rrent font( +sp getacti)e+indo+34 Eet rn a +indo+ state pointer to an acti)e +indo+, bloc0ing ntil a +indo+ is acti)e( Probably +ill be generali7ed to incl de a non-bloc0ing )ariant( Eet rns NULL if no +indo+s are opened( getbg3+, s4

334

Eet rns 3+rites into s4 the c rrent bac0gro nd color( getcan)as3+, s4 Eet rns 3+rites into s4 the c rrent can)as state( getdefa lt3+, sVprog, sVopt, s4 "et any +indo+ system defa lts for a program named sVprog reso rce named sVopt, +rite res lt in s( getdisplay3+, s4 9rite a string to s +ith the c rrent display name( getdra+op3+, s4 Eet rn c rrent dra+ing operation, one of )ario s logical combinations of so rce and destination bits( getfg3+, s4 Eet rns 3+rites into s4 the c rrent foregro nd color( getfntnam3+, s4 Eet rns 3+rites into s4 the c rrent font( This interface may get changed since a portable font naming mechanism is to be installed( #ame is presently al+ays prefi.ed by 2fontP2 3pretty st pid, h h4, m st be an artifact of merging +indo+ system ports, +ill be changed( geticonic3+, s4 Eet rn c rrent +indo+ iconic state in s, co ld 2iconify2 or +hate)er( 6bsolete 3s bs med by can)as attrib te, getcan)as344( geticonpos3+, s4 Eet rn iconJs position to s, an encoded 2.,y2 format string( int getimstr3+, ., y, +idth, height, paltbl, data4 "ets an image as a string( Used in "I$ code( getlinestyle3+, s4 Eet rn c rrent line style, one of solid, dashed, or striped( getVm tableVname3+, i4 Eet rns the string color name c rrently associated +ith a m table color( getpattern3+, s4 Eet rn c rrent fill pattern in s( getpi.el3+, ., y, long Kr)4 1ssign E"/ )al e for pi.el 3.,y4 into Kr)( getpi.elVinit3+, str ct imgmem Kimem4

335

Prepare to fetch pi.el )al es from +indo+, obtaining contents from ser)er if necessary( This f nction does all the real +or0 sed by s bseI ent calls to getpi.el34( getpointername3+, s4 9rite mo se pointer appearance, by name, to s( getpos3+4 Update the +indo+ stateJs pos. and posy fields +ith the c rrent +indo+ position( get)is al3+, s4 9rite a string to s that e.plains +hat type of display + is on, e(g( 2)is alP.,y,72, +here . is a class, y is the bits per pi.el, and 7 is n mber of colormap entries a)ailable( This Dspecific anachronism is li0ely to go a+ay( =ideC rsor3+sp +s4 =ide the te.t c rsor on +indo+ state +s( IC6#$I&C#1>C3+4 Prod ce char K for +indo+Js icon image file name if there is one( IC6#&1/C&3+4 Prod ce char K for iconJs title if there is one( isetbg3+, i4 *et bac0gro nd color to m table color table entry i( > table colors are not a)ailable on all display types( isetfg3+, i4 *et foregro nd color to m table color table entry i( > table colors are not a)ailable on all display types( I*IC6#IC3+4 Eet rn 1 if the +indo+ is presently minimi7ed!iconic, 0 other+ise( I*$U&&*CECC#3+4 Eet rn 1 if the +indo+ is presently ma.imi7ed!f llscreen, 0 other+ise( I*#6E>1&9I#%693+4 Eet rn 1 if the +indo+ is neither minimi7ed nor ma.imi7ed, 0 other+ise( &C1%I#"3+4 Eet rn c rrent integer leading, the n mber of pi.els from line to line( &I#C9I%T=3+4 Eet rn c rrent integer line +idth sed d ring dra+ing( lo+er9indo+3+4

33

&o+er the +indo+ to the bottom of the stac0( m tableVcolor3+, dptr dp, i, CVinteger Kres lt4 1llocate a m table color from color spec gi)en by dp and i, placing res lt 3a small negati)e integer4 in *result( nati)ecolor3+, s, r, g, b4 Interpret a platform-specific color name s 3define appropriately for yo r +indo+ system4( Under D, +e can do this only if there is a +indo+( polle)ent34 Poll for a)ailable e)ents on all opened displays( This is +here the interpreter calls the +indo+ system interface( Eet rn a -1 on an error, other+ise ret rn co nt of ho+ long before it sho ld be polled 3<004( I eryVpointer3+, DPoint K.p4 Prod ce mo se pointer location relati)e to +( I eryVrootpointer3DPoint K.p4 Prod ce mo se pointer location relati)e to root +indo+ on defa lt screen( raise9indo+3+4 Eaise the +indo+ to the top of the stac0( bool readimage3+, s, ., y, int Kstat s4 Eead image from file s into + at 3.,y4( *tat s is 0 if e)erything +as 0osher, 1 if some colors +erenJt a)ailable b t the image +as read 68, if a ma5or problem occ rs it ret rns $ailed( *ee loadimage34 for the real action( rebind3+, +24 1ssign +Js conte.t to that of +2( ECC=CI"=T3rec4 The height component of an DEectangle( "ets 2fi.ed p2 3con)erted4 into a U2 )al e if necessary, in +indo+ system specific code( ECC9I%T=3rec4 The +idth component of an DEectangle( "ets 2fi.ed p2 3con)erted4 into a D2 )al e if necessary, in +indo+ system specific code( ECCD3rec4 The . component of an DEectangle( ECCU3rec4 The y component of an DEectangle( E69T6U3+, i4

33!

Eet rn integer con)ersion from a 1-based te.t ro+ to a pi.el coordinate( *CECC#%CPT=3+4 Eet rns the n mber of bits per pi.el( int setbg3+, s4 *et the conte.t bac0gro nd color to s( Eet rns Succeeded or Failed( setcan)as3+, s4 *et can)as state to s, ma0e it "iconic", "hidden" or +hate)er( 1 can)as )al e e.tension s ch as f llscreen +o ld go here( Changes in can)as state are tantamo nt to destroying the old +indo+, creating a ne+ +indo+ 3+ith appropriate si7e and style4 and ad5 sting the pi.map si7e correspondingly( > ch of the associated logic, ho+e)er, might be located in the e)ent handlers for related +indo+ system e)ents( setclip3+4 *et 3enable4 clipping on w from its conte.t( setc rsor3+, i4 T rn te.t c rsor on or off( Te.t c rsor is off 3in)isible4 by defa lt( setdisplay3+, s4 *et the display to se for this +indo+, fails if the +indo+ is already open some+here( setdra+op3+, s4 *et dra+ing operation to one of )ario s logical combinations of so rce and destination bits( int setfg3+, s4 *et the conte.t foregro nd color to s( Eet rns * cceeded or $ailed( setfillstyle3+, s4 *et fill style to solid, mas0ed, or te.t red( bool setfont3+, char KKs4 *et the conte.t font to s( This f nction first attempts to se the portable font naming mechanism, it resorts to the system font mechanism if the name is not in portable synta.( setgamma3+, gamma4 *et the conte.tJs gamma correction factor( setgeometry3+, s4 *et the +indo+Js si7e and!or position( setheight3+, i4 *et +indo+ height to i, +hether or not +indo+ is open yet(

33"

seticonicstate3+, s4 *et +indo+ iconic state to s, it co ld be "iconify" or +hate)er( 6bsolete, setcanvas() is more important( seticonimage3+, dptr d4 *et +indo+ icon to d( Co ld be string filename or e.isting pi.map 3i(e( another +indo+Js contents4( Pi.map assignment no longer possible, so one co ld simplify this to 5 st ta0e a string parameter( seticonlabel3+, s4 *et iconJs string title to s( seticonpos3+, s4 >o)e iconJs position to s, an encoded 2.,y2 format string( setimage3+, s4 *et an initial image for the +indo+ from file s( 6nly )alid d ring open()( setleading3+, i4 *et line spacing to i pi.els from line to line( This incl des font height and e.ternal leading, so i < fontheight means lines dra+ partly o)er preceding lines, i > fontheight means e.tra spacing( setlinestyle3+, s4 *et line style to solid, dashed, or striped( setline+idth3+, i4 *et line +idth to i( setVm table3+, i, s4 *et m table color inde. i to color s( *etPattern3+, s, sVlen4 *et fill pattern to bits gi)en in s( $ill pattern is not sed nless fillstyle attrib te is changed to "patterned" or "opaquepatterned"( *etPattern/its3+, +idth, bits, nbits4 *et fill pattern to bits gi)en in the array of integers named bits( $ill pattern is not sed nless fillstyle attrib te is changed to "patterned" or "opaquepatterned"( setpointer3+, s4 *et mo se pointer appearance to shape named s( setpos3+, s4 >o)e +indo+ to s, a string encoded 23.,y42 thing(

33#

set+idth3+, i4 *et +indo+ +idth to i, +hether or not +indo+ is open yet( set+indo+label3+, s4 *et +indo+Js string title to s( *ho+C rsor3+sp +s4 *ho+ the te.t c rsor on +indo+ state ws( int strimage3+, ., y, +idth, height, e, s, len4 %ra+s a character-per-pi.el image, sed in DrawImage()( *ee blimage()( *ysColor %efine this type to be the +indo+ systemJs E"/ color str ct re( TCDT9I%T=3+, s, sVlen4 Eet rns the integer te.t +idth of s sing wJs c rrent font( toggleVfgbg3+4 *+ap the foregro nd and bac0gro nd on w( nsetclip3+4 %isable clipping on w from its conte.t( UpdateC rsorPos3+sp +s, +cp +c4 >o)e the te.t c rsor on +indo+ state ws and conte.t wc( +alert3+, i4 *o nds an alert 3beep4( i is a )ol me, it can range bet+een -100 and 100, 0 is normal( +arpPointer3+, ., y4 9arp the mo se location to 3.,y4( +close3+4 Closes +indo+ w( If there are other bindings that refer to the +indo+, they are con)erted into pi.maps, i(e( the +indo+ disappears b t the can)as is still there and can be +ritten on and copied from( +fl sh3+4 $l sh o tp t to +indo+ +, a no-op on some systems( +getI3+, dptr res lt4 "et an e)ent from wJs pending I e e, p t res lts in descriptor *res( Eet rns -1 for an error, 1 for s ccess 3sho ld fi. this4( 9I#%69&1/C&3+4

34&

Prod ce char * for +indo+Js title if there is one(

$I&C K+open3s, str ct bVlist Klp, dptr attrs, i, int KerrVinde., isV3d4 6pen +indo+ named s, +ith )ario s attrib tes( This o ght to be merged from )ario s +indo+ system dependent files, b t presently each one defines its o+n( Copy and modify from rxwin.ri or rmswin.ri( The ret rn )al e is really a wbp, cast to a FILE *( +p tc3c, +4 %ra+ character c on +indo+ w, interpret ne+lines, carriage ret rns, tabs, deletes, bac0spaces, and the bell( +sync3+4 *ynchroni7e ser)er and client 3a no-op on some systems4( .dis3+, s, sVlen4 %ra+ string s on +indo+ w, lo+-le)el( DT6C6&3+, i4 Eet rn integer con)ersion from a 0-based pi.el coordinate to te.t col mn( UT6E693+, i4 Eet rn integer con)ersion from a 0-based pi.el coordinate to te.t ro+(

$/.7 The B mplementation


The reference implementation of UniconJs graphics facilities is +ritten in terms of Dlib, the lo+er-le)el D 9indo+ C interface Q#yeGGR( It does not se the D reso rce manager( The end res lt of these t+o facts is that the implementation is relati)ely )isible@ the semantics are e.pressed fairly directly in the so rce code( 1ltho gh it is necessary to nderstand the semantics of the nderlying D ro tines, hidden beha)ior has been minimi7ed( Unicon does not rely on the D Tool0it Intrinsics 3Dt4 or any higher le)el +idget set s ch as >otif( This g arantees that Unicon +ill compile and r n on any D11 platform( Unicon programs implement their o+n loo0 and feel, +hich may or may not be consistent +ith the other applications on a gi)en D +or0station( The Unicon Program &ibrary incl des ro tines that implement ser interface components +ith an appearance that is similar to >otif( The D implementation employs the DP> D pi.map library if it is a)ailable, DP> is a proposed e.tension to Dlib for storing color images in e.ternal files Q&e=ors:1R( DP> pro)ides color facilities analogo s to the b ilt-in D blac0-and-+hite bitmap ro tines( In addition to the image formats nati)e to each platform, Unicon also s pports "I$ and JPC" as portable image file formats(

$/.1< The M, ?indows mplementation


The >icrosoft 9indo+s implementation of Unicon is +ritten sing 9in32, the lo+er-le)el 32-bit 9indo+s 1PI( It does not se the >icrosoft $o ndation Classes( This ma0es it

341

easier to b ild +ith different C compilers, and easier to port to different 9indo+s implementations, s ch as 9indo+s CC( Installing7 Configuring7 and Compiling the )ource Code / ilding Unicon for 9indo+s 'ersion 11(0 reI ires >ing+32 "CC 2(:A(2( #e+er )ersions of 9indo+s "CC might be made to +or0, b t th sfar ha)e prod ced non+or0ing e.ec tables( 9e hope to add Cyg+in "CC s pport in the f t re( The so rces may also b ild +ith modest re)ision nder >* 'is al CNN 2(0 or ne+er( I ha)e b ilt earlier )ersions +ith >*'C )ersions 2, A, and B( I enco rage yo to try b ilding sing other compilers, and send me yo r config ration files( Uo +ill need a rob st 9in32 platform to compile these so rces, the b ild scripts and 2ma0e2 process tend to fail on older )ersions of 9indo+s( 1. Unpack the sources. Unpac0 ni(7ip in s ch a +ay that it preser)es its s bdirectory str ct re( Un7ip(e.e is recommended rather than 9in[ip( *ee Icon Pro5ect %oc ment 2<3 Qipd2<3R for a pict re of the directory hierarchy( In partic lar, there sho ld be a /I# directory along +ith the *EC directory nder the nicon! directory( 2. Configure the sources. E n 2ma0e 9-Config re-"CC2 3or 2ma0e 9-Config re2 nder >*'C4 to config re yo r so rces to b ild +icon. and +icont, the Unicon )irt al machine interpreter, and the Unicon bytecode compiler, +ith graphics facilities enabled( 3. Compile to make executables. E n 2ma0e Unicon2 to b ild the c rrently-config red binary set( It is +orth disc ssing +hy I pro)ide ma0efiles instead of a pro5ect file for se in the 'is al CNN I%C( The reason is that the so rce files for the Unicon )irt al machine interpreter 3generically called icon., +icon.(e.e in this case4 are +ritten in an e.tended dialect of 1#*I C called ET& Qipd2B1R( $iles in this lang age ha)e the e.tension (r instead of (c and (ri instead of (h( % ring compilation, a program called rtt 3the r n time translator4 translates (rK files into (c files( If someone +ants to sho+ me ho+ to insert this step into the 'is al CNN I%C b ild process, I +o ld be happy to se their I%C( Uo can +rite pro5ect files for the other C programs that ma0e p the Unicon system, b t most modifications to the lang age are changes to the interpreter( Notes on the MS Windows internal functions The f nctions doc mented here are those most li0ely to be in)ol)ed in pro5ects to add feat res to 9indo+s Unicon( handle_child(w, UINT msg, WPARAM wp, LPARAM lp) This proced re handles messages from child +indo+ controls s ch as b ttons( In many cases, this enI e es an e)ent on the Unicon +indo+( int playmedia3+, char Ks4 This cr de f nction +ill call one of se)eral m ltimedia f nctions depending on +hether s is the name of a m ltimedia file 3(+a), (mid, (rmi are s pported4 or an >CI command string(

342

int getselection3+, char Ks4 Eet rn the c rrent contents of the clipboard te.t( The design of this and setselection() need to be broadened a bit to s pport images( int setselection3+, char Ks4 *et the clipboard te.t to s(

343

Chapter 2": .et%or1ing7 3essaging and the P$)I5 Interface


UniconJs system interface is greatly enriched compared +ith Icon, primarily in that it treats Internet connections and Internet-based applications as biI ito s, and e.tends the file type +ith appropriately high-le)el capabilities( $ ndamental TCP and U%P connections are a bree7e sing the net+or0ing facilities, and common application-le)el protocols are s pported )ia the messaging facilities 3see also the D11 graphics facilities and the *H&!6%/C database facilities for e.amples +here application-le)el net+or0ing is pro)ided in Unicon4( Portions of this chapter related to the messaging facilities +ere contrib ted by their a thor, *te)e & mos(

$4.1 =etworking %acilities


(((

$4.$ Messaging %acilities


The Transfer Protocol Li-rar8 1ll of the message facilities are handled by the transfer protocol library 3libtp4( This library pro)ides an abstraction of the many different protocols 3=TTP, *>TP, etc4 into a clear and consistent 1PI( Case of adding s pport for ne+ protocols and porting the entire library to ne+ operating system interfaces +ere primary design goals( These goals are both accomplished by sing the 1TLT &abs discipline and method 3%>4 architect re described belo+( Li-tp 2rchitecture The 0ey feat re of the %> architect re is that it ma0es e.plicit t+o interfaces in the library@ disciplines +hich hold system reso rces and define ro tines to acI ire and manip late them, and methods +hich define the higher-le)el algorithms sed to access these reso rces( This model fits the problem of Internet transfer protocols nicely, the discipline abstracts the operating system interface to the net+or0, and there is a method for each protocol that defines comm nication +ith a ser)er only in terms of the discipline( This architect re ma0es porting easy beca se yo need only create a discipline for the ne+ system, +hich means +riting : f nctions( The only c rrently-e.isting discipline handling both the /er0eley *oc0et and 9I#*6C8 1PIs is only <00 lines long( 6nce a discipline e.ists, the ne+ system immediately gains all of the s pported protocols( The /iscipline The discipline is a C str ct re +hose members are pointers to f nctions@
typedef struct _tp_disc_s Tpdisc_t; /* discipline */

typedef int (*Tpconnect_f)(char* host, u_short port, Tpdisc_t* disc); typedef int (*Tpclose_f)(Tpdisc_t* disc); typedef ssize_t (*Tpread_f)(void* buf, size_t n, Tpdisc_t* disc);

344

typedef ssize_t (*Tpreadln_f)(void* buf, size_t n, Tpdisc_t* disc); typedef ssize_t (*Tpwrite_f)(void* buf, size_t n, Tpdisc_t* disc); typedef void* (*Tpmem_f)(size_t n, Tpdisc_t* disc); typedef int (*Tpfree_f)(void* obj, Tpdisc_t* disc); typedef int (*Tpexcept_f)(int type, void* obj, Tpdisc_t* disc); typedef Tpdisc_t* (*Tpnewdisc_f)(Tpdisc_t* disc); struct _tpdisc_s { Tpconnect_f connectf; /* establish a connection */ Tpclose_f closef; /* close the connection */ Tpread_f readf; /* read from the connection */ Tpreadln_f readlnf; /* read a line from the connection */ Tpwrite_f writef; /* write to the connection */ Tpmem_f memf; /* allocate some memory */ Tpfree_f freef; /* free memory */ Tpexcept_f exceptf; /* handle exception */ Tpnewdisc_f newdiscf; /* deep copy a discipline */ int type; /* (not used currently) */ };

These f nctions define a complete 1PI for acI iring and manip lating all of the system reso rces needed by all of the methods and 3it is hoped4 any concei)able method( /y con)ention, e)ery discipline f nction ta0es a pointer to the c rrent discipline as its last arg ment( 3C)ery method f nction ta0es a library handle +hich contains a pointer to the c rrent discipline, so the discipline f nctions are al+ays a)ailable +hen needed(4 The TpdiscVt is an abstract discipline( In practice, a ne+ discipline +ill e.tend TpdiscVt by at minim m adding some system dependent data s ch as a Uni. file descriptor or 9indo+s *6C8CTK( =ere is the 2Uni.2 discipline 3it +o ld be better called the soc0et discipline since it +or0s for the /er0eley *oc0et 1PI and 9I#*6C8 on m ltiple systems4@
struct _tpunixdisc_s { Tpdisc_t tpdisc; int fd; }

4xception =andling The %> archtect re defines a )ery sef l con)ention for e.ception handling( C.ceptions are passed as integers to the e.ceptf f nction along +ith some e.ception-specific data( The f nction can do arbitrary processesing and then ret rn Y-1, 0, 1a, +hich instr cts the library to retry the operation 314, ret rn an error to the caller 3-14, or ta0e some defa lt action 304( &ibtp ses constants TPVTEU1"1I#, TPVECTUE#CEE6E, and TPV%C$1U&T( 1ltho gh not as po+erf l as lang ages +ith tr e e.ceptions, the %> e.ception handling definitely ser)es to ma0e the code more readable( In the Uni. discipline, e.ceptf is sed to aggregate all of the many, sometimes transient errors that can occ r in net+or0 programming( $or e.ample, the Uni. disciplineJs readf f nction is@
ssize_t unixread(void* buf, size_t n, Tpdisc_t* tpdisc)

345

{ Tpunixdisc_t* disc = (Tpunixdisc_t*)tpdisc; size_t nleft; ssize_t nread; char* ptr = buf; nleft = n; while (nleft > 0) { if ((nread = read(disc->fd, ptr, nleft)) <= 0) { int action = tpdisc->exceptf(TP_EREAD, &nread, tpdisc); if (action > 0) { nread = 0; continue; } else if (action == 0) { break; } else { return (-1); } } nleft -= nread; ptr += nread; } return (n nleft); }

The Uni. read34 system call can ret rn a positi)e n mber, indicating the n mber of bytes read, a negati)e n mber, indicated error, or 7ero, if end-of-file is reached 3or a net+or0 connection is closed by the remote host4( 9e consider the latter t+o cases e.ceptional, and as0 e.ceptf +hat +e sho ld do( 1n e.ceptf f nction is normally a large s+itch +ith one case for each e.ception( $or TPVCEC1%, it says@
case TP_EREAD: if (errno == EINTR) { return TP_TRYAGAIN; } else { ssize_t nread = (*(ssize_t*)obj); if (nread == 0) { /* EOF */ return TP_DEFAULT; } else { return TP_RETURNERROR; } }

This may not seem )ery re)ol tionary, after all the code that calls e.ceptf and branches on its res lt is 5 st as long as the e.ception handler itself( 9e arenJt e)en gaining m ch codere se o)er the con)entional method, +hich +raps system calls in another f nction +ith names li0e Eead34( The real +in here lies in the ability of the caller to replace or e.tend e.ceptf at r ntime( Uo may ha)e noticed that there is no code abo)e to o tp t an error message, ni.read34 simply ret rns -1 on errors( In fact, the standard and e.pected +ay to o tp t errors is to o)erride e.ceptf( The +trace e.ample sho+n QDDD@ at the end some+here?R ses the follo+ing@

34

Tpexcept_f tpexcept; Tpdisc_t disc; int exception(int e, void* obj, Tpdist_t* disc) { int rc = tpexcept(e, obj, disc); if (rc == TP_RETURNERROR) { if (errno != 0) { perror(url); } else { switch (e) { case TP_HOST: fputs(url, stderr); fputs(": Unknown host\n", stderr); break; default: fputs(url, stderr); fputs(": Error connecting\n", stderr); } } exit(1); } else { return rc; } }

Then instead of the s al@


tp = tp_new(<uri>, <method>, TpdUnix);

+trace copies TpdUni., sa)es and replaces the defa lt e.ception handler, and then ses the copied discipline@
disc = tp_newdisc(TpdUnix); tpexcept = disc->exceptf; disc->exceptf = exception; tp = tp_new(<uri>, <method>, disc);

In the same +ay, +trace also o)errides all of the read and +rite f nctions to pro)ide a trace log of =TTP comm nications(

34!

Part I,: 2ppendixes

34"

34#

2ppendix 2: /ata )tructures


This appendi. s mmari7es, for reference p rposes, all descriptor and bloc0 lay o ts in Icon(

A.1 -escriptors
%escriptors consist of t+o +ords 3normally C ints4@ a d-+ord and a )-+ord( The d-+ord contains flags in its most significant bits and small integers in its least significant bits( The )-+ord contains a )al e or a pointer( The flags are n p ) t nonI alifier )-+ord contains a pointer )ariable trapped )ariable

2(1(1 ,alues There are three significantly different descriptor layo ts for )al es( 1 I alifier for a string is disting ished from other descriptors by the lac0 of an n flag in its d-+ord, +hich contains only the length of the string( $or e.ample, a I alifier for the string 2hello2 is

The n ll )al e and integers ha)e type codes in their d-+ords and are self-contained( C.amples are@

$or all other data types, a descriptor contains a type code in its d-+ord and a pointer to a bloc0 of data in its )-+ord( 1 record is typical@

35&

2(1(2 ,aria-les There are t+o formats for )ariable descriptors( The )-+ord of an ordinary )ariable points to the descriptor for the corresponding )al e@

If the )ariable points to a descriptor in a bloc0, the offset is the n mber of words from the top of the bloc0 to the )al e descriptor( If the )ariable points to a descriptor that corresponds to an identifier, the offset is 7ero( The descriptor for a trapped )ariable contain,s a type code for the 0ind of trapped )ariable in its d-+ord and a pointer to the bloc0 for the trapped )ariable in its )-+ord( The trapped )ariable for Ls b5ect is typical@

A.$ Blocks
9ith the e.ception of the n ll )al e, integers, and strings, the data for Icon )al es is 0ept in bloc0s( The first +ord of e)ery bloc0 is a title that contains the type code for the corresponding data type( $or bloc0s that )ary in si7e for a partic lar type, the ne.t +ord is the si7e of the bloc0 in bytes( The remaining +ords depend on the bloc0 type, e.cept that all non-descriptor data precedes all descriptor data( 9ith the e.ception of the long integer bloc0, the diagrams that follo+ correspond to bloc0s for comp ters +ith 32-bit +ords( 2(2(1 Long Integers 6n comp ters +ith 1B-bit +ords, integers that are too large to fit in the d-+ord of a descriptor are stored in bloc0s( $or e.ample, the bloc0 for the integer G0,000 is

2(2(2 *eal .um-ers Eeal n mbers are represented by C do bles( $or e.ample, on comp ters +ith 32bit +ords, the real n mber 1(0 is represented by

2(2(3 Csets The bloc0 for a cset contains the s al type code, follo+ed by a +ord that contains the n mber of characters in the cset( 9ords totaling 2AB bits follo+, +ith a one in a bit position indicating that the corresponding character is in the cset, and a 7ero indicating that it is not( $or e.ample, Lascii is

351

2(2(4 Lists 1 list consists of a list-header bloc0 that points to a do bly-lin0ed list of list-element bloc0s, in +hich the list elements are stored in circ lar I e es( *ee Chapter B for details( 1n e.ample is the list
[1,2,3]

+hich is represented as

=ere there is only one list-element bloc0@

352

2(2(5 )ets 1 set consists of a set-header bloc0 that contains slots for lin0ed lists of setelement bloc0s( *ee *ec( F(1 for details( 1n e.ample is gi)en by
set([1, 2, 3, 4])

+hich is represented as

The set-element bloc0 for the member 3 is

353

2(2( Ta-les 1 table is similar to a set, e.cept that a table-header bloc0 contains the defa lt assigned )al e as +ell as slots for lin0ed lists of table-element bloc0s( *ee *ec( F(2 for details( 1n e.ample is gi)en by
t := table() every t[1 | 4 | 7] := 1

The table t is represented as

The table-element bloc0 for the entry )al e < in the pre)io s e.ample is

2(2(! Procedures The proced re bloc0s for proced res and f nctions are similar( $or a proced re declaration s ch as

354

procedure calc(i,j) local k static base, index end

the proced re bloc0 is

In a proced re bloc0 for a f nction, there is a )al e of -1 in place of the n mber of dynamic locals( $or e.ample, the proced re bloc0 for repl is

In the case of a f nction, s ch as +rite, +hich has a )ariable n mber of arg ments, the n mber of arg ments is gi)en as -1@

355

2(2(" 6iles The bloc0 for a file contains a pointer to the corresponding file, a +ord containing the file stat s, and a I alifier for the name of the file( $or e.ample, the bloc0 for Lo tp t is

The file stat s )al es are 0 1 2 < G 1B closed open for reading open for +riting open to create open to append open as a pipe

2(2(# Trapped ,aria-les There are three 0inds of trapped )ariables@ 0ey+ord trapped )ariables, s bstring trapped )ariables, and table-element trapped )ariables( The corresponding bloc0s are tailored to the 0ind of trapped )ariable( The )al e of Ltrace ill strates a typical 0ey+ord trapped )ariable@

35

1 s bstring trapped )ariable contains the offset and length of the s bstring, as +ell as a )ariable that points to the Ipalifier for the string( $or e.ample, if the )al e of s is 2abcdef2, the s bstring trapped-)ariable bloc0 for s Q2@AR is

1 table-element trapped-)ariable bloc0 contains a +ord for the hash n mber of the entry )al e, a pointer to the table, the entry )al e, and a descriptor reser)ed for the assigned )al e( $or e.ample, if t is a table, the table-element trapped-)ariable bloc0 for tQ3BR is

2(2(1& Co+4xpressions 1 co-e.pression bloc0 consists of heading information, an array of +ords for sa)ing the C state, an interpreter stac0, and a C stac0@

35!

The refresh bloc0 contains information deri)ed from the proced re bloc0 for the proced re in +hich the co-e.pression +as created( Consider, for e.ample,
procedure labgen(s) local i, j, e i := 1 j := 100 e := create (s || (i to j) || ":") ... end

$or the call labgen32&24, the refresh bloc0 for e is

35"

35#

3 &

2ppendix 0: ,irtual 3achine Instructions


This appendi. lists all the Icon )irt al machine instr ctions( $or instr ctions that correspond to so rce-lang age operators, only the corresponding operations are sho+n( Unless other+ise specified, references to the stac0 mean the interpreter stac0( arg n P sh a )ariable descriptor pointing to arg ment n( asgn expr1 @P exp12

bang lexpr bscan P sh the c rrent )al es of Ls b5ect and Lpos( Con)ert the descriptor prior to these t+o descriptors into a string( If the con)ersion cannot be performed, terminate e.ec tion +ith an error message( 6ther+ise, assign it to Ls b5ect and assign 1 to Lpos( Then s spend( If res med, restore the former )al es of Ls b5ect and Lpos and fail( cat expr1 EE exp12

ccase P sh a copy of the descriptor 5 st belo+ the c rrent e.pression frame( chfail n Change the fail re ipc in the c rrent e.pression frame mar0er to n( coact *a)e the c rrent state information in the c rrent co-e.pression bloc0, restore state information from the co-e.pression bloc0 being acti)ated, perform a conte.t s+itch, and contin e e.ec tion( cofail *a)e the c rrent state information in the c rrent co-e.pression bloc0, restore state information from the co-e.pression bloc0 being acti)ated, perform a conte.t s+itch, and contin e e.ec tion +ith co-e.pression fail re signal set( compl Xexpr caret *a)e the c rrent state information in the c rrent co-e.pression bloc0, restore state information from the co-e.pression bloc0 being acti)ated, perform a conte.t s+itch, and contin e e.ec tion +ith co-e.pression ret rn signal set( create 1llocate a co-e.pression bloc0 and a refresh bloc0( Copy the c rrent proced re frame mar0er, arg ment )al es, and local identifier )al es into the refresh bloc0( Place a proced re frame for the c rrent proced re on the stac0 of the ne+ co-e.pression bloc0( cset a P sh a descriptor for the cset bloc0 at address a onto the stac0( diff di) expr1 --expr2 expr1 ! expr2

d p P sh a n ll descriptor onto the stac0 and then p sh a copy of the descriptor that +as pre)io sly on top of the stac0( efail If there is a generator frame in the c rrent e.pression frame, res me its generator( 6ther+ise remo)e the c rrent e.pression frame( If the ipc in its mar0er is non7ero, set ipc to it( If the fail re ipc is 7ero, repeat efail( eI) expr1 PPP expr2

3 1

eret *a)e the descriptor on the top of the stac0( Un+ind the C stac0( Eemo)e the c rrent e.pression frame from the stac0 and p sh the sa)ed descriptor( escan %ereference the top descriptor on the stac0 if necessary( Copy it to the place on the stac0 prior to the sa)ed )al es of Ls b5ect and Lpos 3see bscan4( C.change the c rrent )al es of Ls b5ect and Lpos +ith the sa)ed )al es on the stac0( Then s spend( If res med, restore the )al es of Ls b5ect and Lpos from the stac0 and fail( es sp Create a generator frame containing a copy of the portion of the stac0 that is needed if the generator is res med( field n Eeplace the record descriptor on the top of the stac0 by a descriptor for field n of that record( global n P sh a )ariable descriptor pointing to global identifier n( goto n *et ipc to n( init n Change init instr ction to goto( int n inter P sh a descriptor for the integer n( expr1 KK expF2

in)o0e n exp1lG<expr1, expF2, (((, exprn= 0ey+d n P sh a descriptor for 0ey+ord n( lconcat expr1 EEE expr2 le.eI expr1 PP expr2 le.ge expr1 ZP expr2 le.gt le.le le.lt expr1 Z expr2 expr1 gP expr2 expr1 g expr2

le.ne expr1 -PP expr2 limit Con)ert the descriptor on the top of the stac0 to an integer( If the con)ersion cannot be performed or if the res lt is negati)e, terminate e.ec tion +ith an error message( If it is 7ero, fail( 6ther+ise, create an e.pression frame +ith a 7ero fail re ipc( line n *et the c rrent line n mber to n( llist n Hexpr1,expr2,...,exprnI local n P sh a )ariable descriptor pointing to local identifier n( ls sp %ecrement the c rrent limitation co nter, +hich is immediately prior to the c rrent e.pression frame on the stac0( If the limit co nter is non7ero, create a generator frame containing a copy of the portion of the interpreter stac0 that is needed if the generator is res med( If the limitation co nter is 7ero, n+ind the C stac0 and remo)e the c rrent e.pression frame from the stac0( mar0 Create an e.pression frame +hose mar0er contains the fail re ipc corresponding to the label n, the c rrent efp, gfp, and ile)el(

3 2

mar00 Create an e.pression frame +ith a 7ero fail re ipc( min s expr1 -expr2 mod m lt expr1 d expr2 expr1 K expr2

neg -expr neI) exprl -PPP expr2

nonn ll Jexpr n ll Kexpr n mberLexpr ( n meI expr1 P expr2 n mge exprl TP expr2 n mgt expr1 T expr2 n mle exprl cP expr2 n mlt exprl c expr2 n mne exprl -P expr2 pfail If Ltrace is non7ero, decrement it and prod ce a trace message( Un+ind the C stac0 and remo)e the c rrent proced re frame from the stac0( Then fail( pl s exprl N expr2

pn ll P sh a n ll descriptor( pop Pop the top descriptor(

po+er exprl X expr2 pret %ereference the descriptor on the top of the stac0, if necessary, and copy it to the place +here the descriptor for the proced re is( If Ltrace is non7ero, decrement it and prod ce a trace message( Un+ind the C stac0 and remo)e the c rrent proced re frame from the stac0( ps sp Copy the descriptor on the top of the stac0 to the place +here the descriptor for the proced re is, dereferencing it if necessary( Prod ce a trace message and decrement Ltrace if it is non7ero( Create a generator frame containing a copy of the portion of the stac0 that is needed if the proced re call is res med( p sh1 P sh a descriptor for the integer 1( p shn1 P sh a descriptor for the integer -1( I it C.it from the interpreter(

random )expr rasgn expr1 c- expr2

3 3

real a P sh a descriptor for the real n mber bloc0 at address a onto the stac0( refresh Mexpr rs+ap expr1 c-T expr2 sd p sect si7e P sh a copy of the descriptor on the top of the stac0r expr1 H expr2$ expr3 R Ke.pr

static n P sh a )ariable descriptor pointing to static identifier n( str n, a P sh a descriptor for the string of length n at address a( s bsc expr1Hexpr2I s+ap expr1 @P@ exp12 tabmat NexpF toby expr1 to expr2 by expr3

nions expr1 NN expr2 nmar0Eemo)e the c rrent e.pression frame from the stac0 and n+ind the C stac0( )al e .expr

3 4

2ppendix C: ,irtual 3achine Code


The )irt al machine code that is generated for )ario s 0inds of Icon e.pression is listed belo+( The form of code gi)en is icode, the o tp t of the Icon lin0er cast in a readable format( The code prod cedJ by the Icon translator, +hich ser)es as inp t to the Icon lin0er, is slightly different in some cases, since th lin0er performs some refinements(

C.1 dentifiers
1s mentioned in *ec( G(2(2, the fo r 0inds of identifiers are disting ished by +here their )al es are located( 1ll are referred to by indices, +hich are 7ero based( The )al es of global identifiers are 0ept in an array that is loaded from thl icode file and is at a fi.ed place in memory d ring program e.ec tion( /y con )ention, the 7eroth global identifier contains the proced re descriptor for main The follo+ing instr ction p shes a )ariable pointing to the )al e of main onto the interpreter stac0@ main global 0

*tatic identifiers are essentially global identifiers that are only 0no+n on a per-proced re basis( &i0e global identifiers, the )al es of static identifiers are in an array that is at a fi.ed location( *tatic identifiers are n mbered starting at 7ero and contin ing thro gh the program( $or e.ample, if co nt is static identifier 10 the follo+ing instr ction p shes a )ariable descriptor pointing to that static identifier onto the stac0@ co nt static 10

The space for the )al es of arg ments and local identifiers is allocated on the stac0 +hen the proced re in +hich they occ r is called( If . is arg ment 7ero and i is local 7ero for the c rrent proced re, the follo+ing instr ctions p sh )ariable descriptors for them onto the stac0@ . i arg local 0 0

C.$ "iterals
The )irt al machine instr ction generated for an integer literal p shes the integer onto the stac0 as an Icon descriptor( The )al e of the integer is the arg ment to the instr ction@ 100 int 100

The instr ction generated for a string literal is similar to that for an integer literal, e.cept that the address of the string and its length are gi)en as arg ments( The string itself is in a region of data prod ced by the lin0er and is loaded as part of the icode file@ 2hello2 str A,a1

The instr ction generated for a real or cset literal has an arg ment that is the address of a data bloc0 for the corresponding )al e( * ch bloc0s are in the data region generated by the lin0er@

3 5

100(2

real

a2

Jaeio J

cset

a3

C.& Ceywords
The instr ction generated for most 0ey+ords res lts in a call to a C f nction that p shes a descriptor for the 0ey+ord onto the stac0( The arg ment is an inde. that identifies the 0ey+ord( $or e.ample, Ldate is 0ey+ord <@ Ldate 0ey+d <

*ome 0ey+ords correspond directly to )irt al machine instr ctions( C.amples are Ln ll and Lfail@ Ln ll Lfail pn ll efail

C.( Operators
The code generated for a nary operator first p shes a n ll descriptor, then e)al ates the code for the arg ment, and finally e.ec tes a )irt al machine instr ction that is specific to the operator@ Ke.pr pn ll code for expr si7e

The code generated for a binary operator is the same as the code generated for a nary operator, e.cept that there are t+o arg ments@ e.pr1 N e.pr2 pn ll code for expr1 code for expr2 pl s

1n a gmented assignment operator ses the )irt al machine instr ction d p to d plicate the res lt prod ced by its first arg ment@ e.pr1 N@P e.pr2 pn ll code for expr1 d p code for expr2 pl s asgn

The difference bet+een the code generated for left- and right-associati)e operators is ill strated by the follo+ing e.amples@

e.pr1 N e.pr2 N e.pr3

pn ll pn ll code for expr1 code for expr2 pl s code for expr3 pl s

e.pr1 @P e.pr2 @P e.pr3

pn ll code for expr1 pn ll code for expr2 code for expr3 asgn asgn

1 s bscripting e.pression is simply a binary operator +ith a disting ished synta.@ e.pr1 Q e.pr2 R pn ll code for expr1 code for expr2 s bsc

1 sectioning e.pression is a ternary operator@ e.pr1 Q e.pr2 @ e.pr3 R pn ll code for expr1 code for expr2 code for expr3 sect

*ectioning e.pressions +ith relati)e range specifications are simply abbre)iations( The )irt al machine instr ctions for them incl de the instr ctions for performing the necessary arithmetic@ e.pr1 Q e.pr2 N@ e.pr3 R pn ll code for expr1 code for expr2 d p code for expr3 pl s sect

1 to-by e.pression is another ternary operator +ith a disting ished synta.@ e.pr1 to e.pr2 by e.pr3 pn ll code for expr1 code for expr2 code for expr3 toby

3 !

If the by cla se is omitted, an instr ction that p shes a descriptor for the integer is s pplied@ e.pr1 to e.pr2 pn ll code for expr1 code for expr2 p sh1 toby

The code generated for an e.plicit list is similar to the code generated for an operator( The instr ction that constr cts the list has an arg ment that indicates the n mber of elements in the list@ Qe.pr1, e.pr2, e.pr3R pn ll code for expr1 code for expr2 code for expr3 llist

C.) Calls
The code generated for a call also is similar to the code generated for an operator e.cept that a n ll descriptor is not p shed 3it is pro)ided by the in)o0e instr ction4( The arg ment of in)o0e is the n mber of arg ments present in the call, not co nting the 7eroth arg ment, +hose )al e is the proced re or integer that is applied to the arg ments@ e.pr03e.pr1, e.pr24 code for expr0 code for expr1 code for expr2 in)o0e 2

In a m t al e)al ation e.pression in +hich the 7eroth arg ment of the 2call2 is omitted, the defa lt )al e is -1, for +hich an instr ction is pro)ided@ 3e.pr1, e.pr2, e.pr34 p shn1 code for expr1 code for expr2 code for expr3 in)o0e 3

C./ Compo#nd 58pressions and ConA#nction


The difference bet+een a compo nd e.pression and a con5 nction e.pression is ill strated by the follo+ing e.amples( #ote that the code generated for con5 nction is considerably simpler than that generated for a compo nd e.pression, since no separate e.pression frames are needed@

3 "

Ye.pr1, e.pr2, e.pr3a &1@

mar0 &1 code for expr1 nmar0 mar0 &2 code for expr2 nmar0 &2@ code for expr3

e.pr1 L e.pr2 L e.pr3

code for expr1 pop code for expr2 pop code for expr3

C.3 ,election 58pressions


In the code generated for an if-then-else e.pression, the control e.pression bo nded and has an e.pression frame of its o+n@ if e.pr1 then e.pr2 else e.pr3 mar0 &1 code for expr1 nmar0 code for expr2 goto &2 &1@ code for expr3 &2@ If the else cla se is omitted, mar00 is sed, so that if the control e.pression fails, this fail re is transmitted to the enclosing e.pression frame@ if e.pr1 then e.pr2 mar00 code for expr1 nmar0 code for expr2

The code generated for a case e.pression is relati)ely complicated( 1s for similar control str ct res, the control e.pression is bo nded( The res lt it prod ces is placed on the top of the stac0 by the eret instr ction, +hich sa)es the res lt of e)al ating expr1, remo)es the c rrent e.pression frame, and then p sh the sa)ed res lt on the top of the stac0( The ccase instr ction p shes a n ll descriptor onto the stac0 and d plicates the descriptor 5 st belo+ the c rrent efp on the top of the stac0( This has the effect of pro)iding a n ll descriptor and the first arg ment for the eI i)alence comparison operation performed by eI)( The second arg ment of eI) is pro)ided by the code for the selector cla se( The remainder of the code for a case cla se remo)es the c rrent e.pression frame mar0er( in case the comparison s cceeds( and e)al ates the selected e.pression@

3 #

case e.pr1 of Y e.pr2 @ e.pr3 e.pr< @ e.prA defa lt@ e.prB a

mar00 code for expr1 eret mar0 &2 ccase code for expr2 eI) nmar0 pop code for expr3 goto &1 &2@ mar0 &3 ccase code for expr. eI) nmar0 pop code for expr/ goto &1 &3@ pop code for expr0 &1@

C.4 =egation
The not control str ct re fails if its arg ment s cceeds b t prod ces the n ll )al e if its arg ment fails@ not e.pr mar0 &1 code for expr nmar0 efail &1@ pn ll

C.7 9enerati6e Control ,tr#ct#res


If the first arg ment of an alternation e.pression prod ces a res lt, es sp prod ces a generator frame for possible res mption and d plicates the s rro nding e.pression frame on the top of the stac0( The res lt of the first arg ment is then p shed on the top of the stac0, so that it loo0s as if the first arg ment merely prod ced a res lt( The second arg ment is then bypassed( 9hen the first arg ment does not prod ce a res lt, its e.pression frame is remo)ed, lea)ing the second arg ment to be e)al ated@

3!&

e.pr1 O e.pr2

mar0 &1 code for expr1 es sp goto &2 &1@ code for expr2 &2@

*ince alternation is treated as a binary operation, a s ccession of alternations prod ces the follo+ing code@ e.pr1 O e.pr2 O e.pr3 mar0 &1 code for expr1 es sp goto &2 &1@ mar0 &3 code for expr2 es sp goto &2 &2@ code for expr3 &3@ Eepeated alternation is complicated by the special treatment of the case in +hich its arg ment does not prod ce a res lt( If it does not prod ce a res lt, the fail re is transmitted to the enclosing e.pression frame, since the fail re ipc is 0( =o+e)er, if it prod ces a res lt, the fail re ipc is changed by chfail so that s bseI ent fail re ca ses transfer to the beginning of the repeated alternation e.pression( The es sp instr ction prod ces the same effect as that for reg lar alternation( #ote that changing the fail re ipc only affects the e.pression frame mar0er on the stac0( 9hen mar0 is e.ec ted again, a ne+ e.pression frame mar0er +ith a fail re ipc of 0 is created( O e.pr &1@ mar00 code for expr chfail &1 es sp In the limitation control str ct re, the normal left-to-right order of e)al ation is re)ersed and the limiting e.pression is e)al ated first( The limit instr ction chec0s that the )al e is an integer and p shes it( It then creates an e.pression frame mar0er +ith a 7ero fail re ipc( Th s, the limit is al+ays one descriptor belo+ the e.pression mar0er created by the s bseI ent mar0 instr ction( The ls sp instr ction is similar to the es sp instr ction, e.cept that it chec0s the limit( If the limit is 7ero, it fails instead of s spending( 6ther+ise, the limit is decremented@ e.pr1 b e.pr2 code for expr2 limit code for expr1 ls sp

3!1

C.1< "oops
The code generated for a repeat loop ass res that the e.pression frame is handled niformly, regardless of the s ccess or fail re of the e.pression@ repeat e.pr &1@ mar0 &1 code for expr nmar0 goto &1 1 +hile loop, on the other hand, transmits fail re to the enclosing e.pression frame if its control e.pression fails( #ote that both expr1 and expr2 are e)al ated in separate e.pression frames@ +hile e.pr1 do e.pr2 &1@ mar00 code for expr1 nmar0 mar0 code for expr2 goto &1 If the do cla se is omitted, the generated code is similar to that for a repeat loop, e.cept for the arg ment of mar0@ +hile e.pr &1@ mar00 code for expr nmar0 goto &1 1n ntil loop simply re)erses the logic of a +hile loop@ ntil e.pr1 do e.pr2 &1@ mar0 &2 code for expr1 nmar0 efail &2@ mar0 &1 code for expr2 nmar0 goto &1 The e)ery-do control str ct re differs from the +hile-do control str ct re in that +hen its control e.pression prod ces a res lt, its e.pression frame is not remo)ed( Instead, the res lt is discarded by pop, and the do cla se is e)al ated in its o+n e.pression frame( The efail instr ction forces the res mption of a s spended generator that may ha)e been prod ced by an es sp instr ction in the code for e.pr1@

3!2

e)ery e.pr1 do e.pr2

mar00 code for expr1 pop mar00 code for expr2 nmar0 efail

/rea0s from loops normally occ r in the conte.t of other e.pressions( In the follo+ing e.ample, the brea0 e.pression remo)es the e.pression frame corresponding to the repeat control str ct re, e)al ates its arg ment e.pression, and then transfers to a point beyond the end of the loop@ repeat e.pr1 O brea0 e.pr2 &1@ mar0 &1 mar0 &3 code for expr1 es sp goto &< &3@ nmar0 code for expr2 goto &2 &<@ nmar0 goto &2@ &i0e brea0, ne.t normally occ rs in the conte.t of other e.pressions( In the follo+ing e.ample, ne.t transfers control from a selection e.pression to the beginning of the loop@ +hile e.pr1 do if e.pr2 then ne.t else e.pr3 &1@ mar00 code for expr1 nmar0 mar0 &1 mar0 &< code for expr2 nmar0 goto &2 &<@ code for expr3 &2@ nmar0 goto &1 &1

C.11 ,tring ,canning


*tring scanning is a control str ct re, rather than an operator, since the )al es of Ls b5ect and Lpos m st be sa)ed and ne+ )al es established before the second arg ment is e)al ated( This is accomplished by bscan( The instr ction bscan sa)es the c rrent )al es of

3!3

Ls b5ect and Lpos and establishes their ne+ )al es before expr2 is e)al ated( escan restores their )al es prior to the e.ec tion of bscan@ e.pr1 ? e.pr2 code for expr1 bscan code for expr2 escan

1 gmented string scanning is similar to other a gmented operations, b t it differs in that the string scanning operation does not p sh a n ll )al e on the stac0( The instr ction sd p therefore is slightly different from d p, +hich is sed in other a gmented assignment operations@ e.pr1 ?@P e.pr2 pn ll code for expr1 sd p bscan code for expr2 escan asgn

C.1$ !roced#re *et#rns


The code generated for a ret rn e.pression consists of the pret instr ction( =o+e)er, it allo+s for fail re of the arg ment of ret rn, +hich is eI i)alent to fail@ ret rn e.pr &1@ pfail fail pfail mar0 &1 code for expr pret

The code generated for the s spend e.pression is analogo s to the code generated for alternation, e.cept that the res lt is ret rned from the c rrent proced re( The efail instr ction ca ses s bseI ent res lts to be prod ced if the call is res med@ s spend e.pr mar00 code for expr ps sp efail

C.1& Co+58pression Creation


The first instr ction in the code generated for a create e.pression is a transfer aro nd the code that is e.ec ted +hen the res lting co-e.pression is acti)ated The create instr ction constr cts a descriptor that points to the co-e.pression +hose code is at the label gi)en in its arg ment and p shes this descriptor on the stac0( 9hen the co-e.pression is acti)ated the first time, e)al ation starts at the label stored in the co-e.pression( The res lt that is on the top of the stac0 is popped, since transmission of a res lt to the first acti)ation of a coe.pression is meaningless( If expr prod ces a res lt, coret ret rns that res lt to the acti)ating co-e.pression( If expr fails, cofail signals fail re to the acti)ating co-e.pression@

3!4

create e.pr &1@

goto

&3

pop mar0 &2 code for expr coret efail &2@ cofail goto &3@ create &1 &2

3!5

2ppendix /: 2dding 6unctions and /ata T8pes


Icon is designed so that ne+ f nctions and data types can be added +ith com( parati)e ease( * ch additions reI ire changes only to the r n-time system, the translator and lin0er are not affected( This appendi. pro)ides some g idelines for modifying the Icon r n-time system and lists sef l macro definitions and s pport ro tines( It is designed to be read in con5 nction +ith the so rce code for the implementation( The material incl ded here only to ches on the possibilities( There is no s bstit te for act ally implementing ne+ feat res and spending time st dying the more intricate parts of the Icon system(

-.1 %ile Organization


The Icon system is organi7ed in a hierarchy( Under U#ID, the Icon hierarchy is rooted at )B and is s ally located at ! sr!icon!)B( $or other operating systems, Icon may be named differently( The )B directory has se)eral s bdirectories that contain so rce code, test programs, doc ments, and so forth( The so rce code is in )B!src( There are fi)e s bdirectories in src@ h icont icon. lin0 tran common header files command processor r n-time system lin0er translator

The s bdirectory h holds header files that are incl ded by files in the other s bdirectories( The file h!rt(h is partic larly important, since it contains most 0f the definitions and declarations sed in the r n-time system( The rest of the code related to the r n-time system is in the s bdirectory icon.( The first letters of files in this s bdirectory indicate the nat re of their contents( $iles that begin +ith the letter f contain code for f nctions, +hile files that begin +ith o contain code for operators( Code related directly to the interpreti)e process is in files that begin +ith the letter i( 2&ibrary2 ro tines for operations s ch as list constr ction that correspond to )irt al machine instr ctions are in files that begin +ith the letter l( $inally, files that begin +ith the letter r hold r n-time s pport ro tines( 9ithin each category, ro tines are gro ped by f nctionality( $or e.ample, string constr ction f nctions s ch as map are in fstr(c, +hile storage allocation and garbage collection ro tines are in rmemmgt(c(

-.$ Adding %#nctions


There are se)eral con)entions and r les of protocol that m st be follo+ed in +riting a ne+ f nction( The sit ations that arise most freI ently are co)ered in the follo+ing sections( The e.isting f nctions in f files in icon. pro)ide many e.amples to s pplement the information gi)en here(

3!

/(2(1 6unction /eclarations 1 f nction begins +ith a call of the macro $nc%cl3name, n4, +here name is the name of the f nction as it is called in a so rce-lang age program, and n is the n mber of arg ments for the f nction( $or e.ample,
FncDcl(map,3)

appears at the beginning of the f nction map( This macro declares the proced re bloc0 for the f nction and pro)ides the beginning of the declaration of a C f nction for the code that follo+s( The )al e of n appears in the proced re bloc0 and is sed to ass re that the n mber of arg ments on the interpreter stac0 +hen the f nction is called is the same as the n mber of arg ments that the f nction e.pects( *ee *ec( 10(3( 1n D is prepended to the name gi)en to a)oid a collision +ith the names of other C ro tines in the r n-time system( Th s, the C f nction that implements map is named Dmap( 1ltho gh the Icon f nction map has three arg ments, the corresponding C f nction has only one@ cargp, +hich is a pointer to an array of descriptors on the interpreter stac0( $or e.ample, $nc%cl3map, 34 generates
Xmap(cargp) register struct descrip *cargp;

6ther macros are pro)ided for referencing the descriptors@ 1rg0 is the descriptor into +hich the res lt of a f nction is placed before it ret rns, 1rg1 is the first descriptor arg ment in the call of the f nction, 1rg2 is the second descriptor arg ment, and so on( These macros concept ally refer to the arg ments in a so rce-lang age call of the f nction( It is ne)er necessary 3or desirable4 to refer to cargp directly( #ote that the descriptor at 1rg0 initially points to the proced re bloc0 for the f nction 3see *ec( 10(14( It is fair to ass me that 1rg1, 1rg2, (((, 1rgi, +here i arg ments are specified in the declaration contain )alid descriptors #othing can be ass med abo t the nat re of these descriptors, other than that they represent )alid so rce-lang age )al es( *imilarly, a f nction m st place a )alid descriptor in 1rg0 before ret rning, o)er+riting the proced re descriptor( The macros described pre)io sly allo+ f nctions to be +ritten +itho t +orrying abo t the details of the interpreter stac0( It is not important to 0no+ ho+ these macros are act ally defined, it is best to thin0 of them in terms of the higher-le)el concepts they embody( /(2(2 *eturning from a 6unction 1 f nction ret rns control to the interpreter by se of one of three macros, Eet rn, * spend, or $ail, depending on +hether the f nction ret rns, s spends, or fails, respecti)ely( Eet rn and $ail ret rn codes that the interpreter ses to differentiate bet+een the t+o sit ations( * spend ret rns control to the interpreter by calling it, as described in *ec( :(3( The se of Eet rn is ill strated by the follo+ing tri)ial f nction that simply ret rns its arg ment@
FncDcl(idem, 1) { Arg0 = Arg1; Return; }

$or e.ample,

3!!

write(idem("hello"))

+rites hello( The se of * spend and $ail is ill strated by the follo+ing f nction, +hich generates its first and second arg ments in s ccession@
FncDcl(gen2,2) { Arg0 = Arg1; Suspend; Arg0 = Arg2; Suspend; Fail; }

$or e.ample,
ever+ )rite(6en2("*ello", "t*ere"))

+rites
hello there

1s ill strated pre)io sly, $ail is sed +hen there is not another res lt to prod ce( It is safe to ass me that 1rg0, 1rg1, (((are intact +hen the f nction is res med to prod ce another res lt( >ost f nctions ha)e a fi.ed n mber of arg ments( 6nly +rite, +rites, and stop in the standard Icon repertoire can be called +ith an arbitrary n mber of arg ments( $or a f nction that can be called +ith an arbitrary n mber of arg ments, an alternati)e declaration macro, $nc%cI'3name4, is sed( 9hen this macro is sed, the f nction is called +ith t+o arg ments@ the n mber of arg ments in the call and a pointer to the corresponding array of descriptors( $or e.ample, $nc%cI'3+rite4 generates
Xwrite(nargs, cargp) int nargs; register struct descrip cargp;

9ithin s ch a f nction, 1rg0 refers to the ret rn )al e as s al, b t the arg ments are referenced sing the macro 1rg3n4( $or e.ample, a f nction that ta0es an arbitrary n mber of arg ments and s spends +ith them as )al es in s ccession is
FncDcIV(gen) { register int n; for (n = 1; n <= nargs; n++) { Arg0 = Arg(n); Suspend; } Fail; }

$or e.ample,
every write(gen("hello","there","!"))

+rites
hello there !

3!"

#ote the se of $ail at the end of the f nction, the omission of $ail +o ld be an error, since ret rning by flo+ing off the end of the f nction +o ld not pro)ide the ret rn code that the interpreter e.pects( /(2(3 T8pe Chec1ing and Conversion *ome f nctions need to perform different operations, depending on the types of their arg ments( 1n e.ample is type3.4@
FncDcl(type, 1) { if (Qual(Arg1)) ( StrLen(Arg0) = 6; StrLoc(Arg0) = "string"; } else { switch (Type(Arg1)) ( case T_Null: StrLen(Arg0) = 4; StrLoc(Arg0) = "null"; break; case T_Integer: case T_Long: StrLen(Arg0) = 7; StrLoc(Arg0) = "integer"; break; case T_Real: StrLen(Arg0) = 4; StrLoc(Arg0) = "real"; break; } } Return; }

1s indicated by this f nction, the d-+ord ser)es to differentiate bet+een types, e.cept for strings, +hich reI ire a separate test( $or most f nctions, arg ments m st be of a specific type( 1s described in *ec( 12(1, type con)ersion ro tines are sed for this p rpose( $or e.ample, the f nction tab3i4 reI ires that i be an integer( It begins as follo+s@
FncDcl(tab. 1 ) { register word i, j; word t, oldpos; long 11; /* * Arg1 must be an integer. */ if (cvint(&Arg1, &11) == CvtFail) runerr(101, &Arg1);

#ote that c)int is called +ith the addresses of 1rg1 and 11( If the con)ersion is s ccessf l, the res lting integer is assigned to 11( 1s indicated by this e.ample, it is the responsibility of a f nction to terminate e.ec tion by calling r nerr if a reI ired con)ersion cannot be made(

3!#

The ro tine c)str, +hich con)erts )al es to strings, reI ires a b ffer, +hich is s pplied by the ro tine that calls it( *ee *ec( <(<(<( This b ffer m st be large eno gh to hold the longest string that can be prod ced by the con)ersion of any )al e( This si7e is gi)en by the defined constant >a.C)t&en( $or e.ample, the f nction to re)erse a string begins as follo+s@
FncDcl(reverse.1 ) { register char c. *floc. *lIoc; register word slen; char sbuf[MaxCvtLen]; extern char *alcstrO; /* * Make sure that Arg1 is a string. */ if (cvstr(&Arg1. sbuf) == CvtFail) runerr(103. &Arg1);

The b ffer is sed only if a nonstring )al e is con)erted to a string( In this case, 1rg1 is changed to a I alifier +hose )-+ord points to the con)erted string in sb f( This string does not necessarily begin at the beginning of sb f( In any e)ent, after a s ccessf l call to c)str, the arg ment is an appropriate I alifier, regardless of +hether a con)ersion act ally +as performed( /(2(4 Constructing .e% /escriptors *ome f nctions need to constr ct ne+ descriptors to ret rn in 1rg0( *ometimes it is con)enient to constr ct a descriptor by assignment to its d- and )-+ords( 'ario s macros are pro)ided to simplify these assignments( 1s gi)en in the f nction type pre)io sly, *tr&en and *tr&oc can be sed to constr ct a I alifier( $or e.ample, to ret rn a I alifier for the string 2integer2, the follo+ing code s ffices@
StrLen(Arg0) = 7; StrLoc(Arg0) = "integer"; Return;

=ere, the ret rned I alifier points to a statically allocated C string( There also are macros and s pport ro tines for constr cting certain 0inds of descriptors( $or e.ample, the macro
Mkint(i, dp);

constr cts an integer descriptor containing the integer i in the descriptor pointed to by dp( The definition of >0int depends on the +ord si7e of the comp ter( 6n 32-bit comp ters, >0int simply prod ces assignments to the d-+ord and )-+ord of descriptor pointed to by dp( 6n comp ters +ith 1B-bit +ords, +hich ha)e both TVInteger and TV&ong forms of integers, >0int prod ces a call to a s pport ro tine( /(2(5 /efault ,alues >any f nctions specify defa lt )al es for n ll-)al ed arg ments( There are s pport ro tines for pro)iding defa lt )al es( $or e.ample,
defstr(Arg3, sbuf, &q);

changes 1rg3 to the string gi)en by the I alifier I in case 1rg3 is n ll-)al ed( If 1rg3 is not n ll-)al ed, ho+e)er, its )al e is con)erted to a string, if possible, by defstr( If this is not possible, defstr terminates e.ec tion +ith an error message(

3"&

/(2( )torage 2llocation $ nctions that constr ct ne+ data ob5ects often need to allocate storage( 1llocation is done in the allocated string region or the allocated bloc0 region, depending on the nat re of the ob5ect( * pport ro tines are pro)ided to perform the act al allocation( 1s mentioned in *ec( 11(<, predicti)e need reI ests must be made before storage is act ally allocated( The f nctions strreI3i4 and bl0reI3i4 reI est i bytes of storage in the allocated string and bloc0 regions, respecti)ely( * ch a reI est generally sho ld be made as soon as an pper bo nd on the amo nt of storage needed is 0no+n( It is not necessary to 0no+ the e.act amo nt, b t the amo nt reI ested m st be at least as large as the amo nt that act ally +ill be allocated( $or e.ample, the f nction reads3f( i4 reI ests i bytes 0 string storage, altho gh the string act ally read may be shorter( !tring +llocation( The f nction alcstr3s, i4 copies i bytes starting at s into the allocated string region and ret rns a pointer to the beginning of the copy( $o e.ample, a f nction do ble3s4 that prod ces the concatenation of s +ith itself is +ritten as follo+s@
FncDcl(double. 1) { register int glen; char sbuf[MaxCvtLen]; extern char *alcstrO; if (cvstr(&Arg1. sbuf) == NULL) runerr(103, &Arg1); glen = StrLen(Arg1); strreq(2 * glen); StrLen(Arg0) = 2 * glen; StrLoc(Arg0) = alcstr(StrLoc(Arg1). glen); alcstr(StrLoc(Arg1), glen); Return; }

If the first arg ment of alcstr is #U&&, instead of being a pointer to a string, the space is allocated and a pointer to the beginning of it is ret rned, b t nothing is copied into the space( This allo+s a f nction to constr ct a string directly in the allocated string region( If a string to be ret rned is in a b ffer as a res lt of con)ersion from another type, care m st be ta0en to copy this string into the allocated string region---other+ise the string in the b ffer +ill be o)er+ritten on s bseI ent calls( Copying s ch strings is ill strated by the f nction string3.4 gi)en in *ec( 12(1( (lock +llocation( The ro tine alcbl03i4 allocates i bytes in the allocated bloc0 region and ret rns a pointer to the beginning of the bloc0( The arg ment of alcbl0 m st correspond to a +hole n mber of +ords( There are r n-time s pport ro tines for allocating )ario s 0inds of bloc0s( These ro tines, in t rn, call alcbl0( * ch s pport ro tines generally fill in part of the bloc0 as +ell( $or e.ample, alccset3i4 allocates a bloc0 for a cset, fills in the title and si7e +ords, and 7eroes the bits for the cset@
struct b_cset *alccset(size) int size; { register struct b_cset *blk; register i; extern union block *alcblk();

3"1

blk = (struct b_cset *)alcblk((word)sizeof(struct b_cset) , T_Cset); blk->size = size; /* * Zero the bit array. */ for (i = 0; i < CsetSize; i++) blk->bits[i] = 0; return blk; %

*ee *ec( %(A(A for a complete list of bloc0-allocation f nctions( /(2(! )torage 3anagement Considerations In addition to ass ring that predicti)e need reI ests are made before storage is allocated, it is essential to ass re that all descriptors contain )alid data at any time a garbage collection may occ r, that all descriptors are accessible to the garbage collector, and that all pointers to allocated data are in the )-+ords of descriptors( #ormally, all the descriptors that a f nction ses are on the interpreter stac0 and are referenced as 1rg0, 1rg1, * ch descriptors are processed by the garbage collector( 6ccasionally, additional descriptors are needed for intermediate comp tations( If s ch descriptors contain pointers in their )-+ords, it is not correct to declare local descriptors, as in
FncDcl(mesh,2) { struct descrip d1, d2;

The problem +ith this approach is that d1 and d2 are on the C stac0 and the garbage collector has no +ay of 0no+ing abo t them( =o+e)er, since all descriptors on the interpreter stac0 are accessible to the garbage collector, intermediate comp tations can be performed on descriptors on the interpreter stac0( C.tra descriptors for this p rpose can be pro)ided by increasing the n mber of arg ments specified for the f nction( Th s,
FncDcl(mesh,4)

ma0es 1rg3 and 1rg< a)ailable for intermediate comp tations( The initial )al es of 1rg3 and 1rg< +ill be n ll beca se of arg ment ad5 stment performed by in)o0e nless mesh is called +ith e.tra arg ments( "arbage collection can occ r only d ring a predicti)e need reI est( =o+e)er, a predicti)e need reI est can occ r bet+een the time a f nction s spends and the time it is res med to prod ce another res lt( ConseI ently, if a pointer is 0ept in a C )ariable in a loop that is prod cing res lts by s spending, the pointer may be in)alid +hen the f nction is res med( Instead, the pointer sho ld be 0ept in the )-+ord of a descriptor that is accessible to the garbage collector( /(2(" 4rror Termination 1n Icon program may terminate abnormally for t+o reasons@ as the res lt of a so rcelang age programming error 3s ch as an in)alid type in a f nction call4, or as a res lt of an error detected in the Icon system itself 3s ch as a descriptor that sho ld ha)e been dereferenced b t +as not4( In case a so rce-lang age error is detected, e.ec tion is terminated by a call of the form
runerr(i, &d);

3"2

+here i is an error message n mber and d is the descriptor for the offending )al e( If there is no specific offending )al e, the second arg ment is 0( The array of error message n mbers and corresponding messages is contained in icon.!imain(c( If there is no appropriate e.isting error message, a ne+ one can be added, follo+ing the g idelines gi)en in 1ppendi. % of "ris+old and "ris+old 1:G3( In theory, there sho ld be no errors in the Icon system itself, b t no large, comple. soft+are system is totally free of errors( *ome sit ations are recogni7able as being potential so rces of problems in case data does not ha)e the e.pected )al es( In s ch sit ations, especially d ring program de)elopment, it is ad)isable to insert calls of the f nction syserr, +hich terminates e.ec tion, indicating that an error +as detected in the Icon system, and prints its arg ment as an indication of the nat re of the error( It is traditional to se calls of the form
syserr("mesh: can't happen");

so that +hen, in fact, the 2impossible2 does happen, there is a reminder of h man frailty( >ore informati)e messages are desirable, of co rse( /(2(# =eader 6iles If a ne+ f nction is added to an e.isting f file in icon., the necessary header files normally +ill be incl ded a tomatically( If a ne+ f nction is placed in a ne+ file, that file sho ld begin +ith
#include "../h/rt.h"

This header file incl des three other header files@ ((!h!config(h general config ration information ((!h!cp conf(h definitions that depend on the comp ter +ord si7e ((!h!memsi7e(h definitions that depend on the comp ter address space 1ll of these files contain appropriate information for the local installation, and no changes in them sho ld be needed( In rare cases, it may be necessary to incl de other header files( $or e.ample, a f nction that deals directly +ith garbage collection might need to incl de icon82gc.h. /(2(1& Installing a .e% 6unction /oth the lin0er and the r n-time system m st 0no+ the names of all f nctions( This infonnation is pro)ided in the header file h!fdefs(h( In order to add a f nction, a line of the form
FncDef(name)

m st be inserted in h!fdefs(h in proper alphabetical order( 6nce this insertion is made, the Icon system m st be recompiled to ta0e into acco nt the code for the ne+ f nction( The steps in)ol)ed in recompilation )ary from system to system( Information concerning recompilation is a)ailable in system-specific installation doc ments(

3"3

-.& Adding -ata Types


1dding a ne+ data type is comparati)ely simple, altho gh there are se)eral places +here changes need to be made( $ail re to ma0e all the reI ired changes can prod ce mysterio s b gs( /(3(1 T8pe Codes 1t present, type codes range from 0 to 1G( C)ery type m st ha)e a distinct type code and corresponding definitions( These additions are made in h!rt(h( $irst, a T -definition is needed( $or e.ample, if a /oolean type is added, a definition s ch as
#define T _Boolean 19

is needed( The )al e of >a.Type, +hich immediately follo+s the type code definitions, m st be increased to 1: accordingly( $ail re to set >a.Type to the ma.im m type code may res lt in program malf nction d ring garbage collection( *ee *ec( 11(3(2( #e.t a 0- definition is needed for the d-+ord of the ne+ type( $or a /oolean type, this definition might be
#define D_Boolean (T_Boolean I F_Nqual)

1ll nonstring types ha)e the $ V#I al flag and their T- type code( Types +hose )-+ords contain pointers also ha)e the $ VPtr flag( /(3(2 )tructures 1 )al e of a /oolean type s ch as the one s ggested pre)io sly can be stored in the d+ord of its descriptor( =o+e)er, most types contain pointers to bloc0s in their )-+ords( In this case, a declaration of astt ct re corresponding to the bloc0 m st be added to h!rt(h( $or e.ample, a ne+ rational n mber data type, +ith the type code TVEational, might be represented by a bloc0 containing t+o descriptors, one for the n merator and one for the denominator( 1n appropriate str ct re declaration for s ch a bloc0 is
struct b_rational { int title; struct descrip numerator; struct descrip denominator; };

*ince rational bloc0s are fi.ed in si7e, no si7e field is needed( =o+e)er, a )ector type +ith code TV'ector in +hich different )ectors ha)e different lengths needs a si7e field( The declaration for s ch a bloc0 might be
struct b_vector { int title; int blksize; struct descrip velems[1]; };

1s mentioned in *ec( <(<(2, the si7e of one for the array of descriptors is needed to a)oid problems +ith C compilers( In practice, this str ct re concept ally o)erlays the allocated bloc0 region, and the n mber of elements )aries from bloc0 to bloc0( 1ny ne+ str ct re declaration for a bloc0 m st be added to the declaration nion bloc0 in h!rt(h(

3"4

/(3(3 Information .eeded for )torage 3anagement 1ll pointers to allocated data m st be contained in the )-+ords of descriptors, since this is the only +ay the garbage collector can locate them( $ rthermore, all non-descriptor data m st precede any descriptors in a bloc0( The amo nt of non-descriptor data, and hence the location of the first descriptor in a bloc0, m st be the same for all bloc0s of a gi)en type( 1s described in *ec( 11(3(2, the garbage collector ses the array bsi7es to determine the si7e of a bloc0 and the array firstd to determine the offset of the first descriptor in the bloc0( These arrays are in icon.!rmemmgt(c( 9hen a ne+ data type is added, appropriate entries m st be made in these arrays( $ail re to do so may res lt in serio s b gs that occ r only in programs that perform garbage collection, and the symptoms may be mysterio s( There is an entry in bsi7es for each type code( If the type has no bloc0, the entry is -1( If the type has a bloc0 of constant si7e, the entry is the si7e of the bloc0( 6ther+ise, the entry is 0, indicating that the si7e is in the second +ord of the bloc0( Th s, the entry for TV/oolean +o ld be -1, the entry for TVEational +o ld be si7eof3str ct bVrational4, and the si7e for TV'ector +o ld be 6( There is a corresponding entry in firstd for each type code that gi)es the offset of the first descriptor in its corresponding bloc0( If there is no bloc0, the entry is -1( If the bloc0 contains no descriptors, the entry is 6( $or e.ample, the entry for TV/oolean +o ld be -1, the entry for TVEational +o ld be 9ord*i7e, and the entry for TV'ector +o ld be 2K9ord*i7e, +here 9ord*i7e is a defined constant that is the n mber of bytes in a +ord( 1 third array, bl0names, pro)ides string names for all bloc0 types( These names are only sed for deb gging, and an entry sho ld be made in bl0names for each ne+ data type( /(3(4 Changes to 4xisting Code In addition to any f nctions that may be needed for operating on )al es of a ne+ data type, there are se)eral f nctions and operators that apply to all data types and +hich may, therefore, need to be changed for any ne+ data type( These are K. si7e of . 3in icon.!omisc(c4 copy3.4 copy of . 3in icon.!fmisc(c4 image3.4 string image of . 3in icon.!fmisc(c4 type3.4 string name of type of . 3in icon.!fmisc(c4 There is not a concept of si7e for all data types( $or e.ample, a /oolean )al e pres mably does not ha)e a si7e, b t the si7e of a )ector pres mably is the n mber of elements it contains( The si7e of a rational n mber is problematical( >odifications to K. are easy, see *ec( <(<(<( There m st be some pro)ision for copying any )al e( $or str ct res, s ch as )ectors, physical copies sho ld be made so that they are treated consonantly +ith other Icon str ct res( $or other data types, the 2copy2 consists of simply ret rning the )al e and not ma0ing a physically distinct copy( This sho ld be done for data types, s ch as /oolean, for +hich there are only descriptors and no associated bloc0s( 9hether or not a copy of a bloc0 for a rational )al e sho ld be made is a more diffic lt decision and depends on ho+ s ch )al es are treated concept ally, at the so rce-lang age le)el( It is, of co rse, easiest not to ma0e a physical copy(

3"5

*ome image m st be pro)ided for e)ery )al e( This image sho ld contain eno gh information to disting ish )al es of different types and, +here possible, to pro)ide some sef l additional information abo t the specific )al e( The amo nt of detail that it is practical to pro)ide in the image of a )al e is limited by the fact that the image is a string that m st be placed in the allocated string region( The type m st be pro)ided for all )al es and sho ld consist of a simple string name( $or e.ample, if . is a /oolean )al e, type3.4 sho ld prod ce 2boolean2( The coding for type is tri)ial, see *ec( %(2(3( There also are se)eral r n-time s pport ro tines that m st be modified for any ne+ type@ o timage order anycmp eI i) image for tracing 3in icon.!rmisc(c4 order for sorting 3in icon.!rcomp(c4 comparison for sorting 3in icon.!rcomp(c4 eI i)alence comparison 3in icon.!rcomp(c4

The image prod ced for tracing p rposes is similar to that prod ced by image and m st be pro)ided for all data types( =o+e)er, o timage prod ces o tp t and is not restricted to constr cting a string in allocated storage( It therefore can be more elaborate and informati)e( There m st be some concept of sorting order for e)ery Icon )al e( There are t+o aspects to sorting@ the relati)e order of different data types and the ordering among )al es of the same type( The ro tine order prod ces an integer that corresponds to the order of the type( If the order of a type is important +ith respect to other types, this matter m st be gi)en some consideration( $or e.ample, a rational n mber probably belongs among the n meric types, +hich, in Icon, sort before str ct re types( 6n the other hand, it probably is not important +hether )ectors come before or after lists( The ro tine anycmp compares t+o )al es, if they ha)e the same order, as defined pre)io sly, anycmp determines +hich is the 2smaller(2 $or e.ample, /oolean 2false2 might 3or might not4 come before 2tr e,2 b t some ordering bet+een the t+o sho ld be pro)ided( 6n the other hand, order among )ectors probably is not important 3or +elldefined4, and they can be l mped +ith the other str ct res in anycmp, for +hich ordering is arbitrary( *ometimes ordering can be I ite complicated, a correct ordering of rational n mbers is nontri)ial( The ro tine eI i) is sed in sit ations, s ch as table s bscripting and case e.pressions, to determine +hether t+o )al es are eI i)alent in the Icon sense( "enerally spea0ing, t+o str ct re )al es are considered to be eI i)alent if and only if they are identical( This comparison is incl ded in eI i) in a general +ay( $or e.ample, eI i) need not be modified for )ectors( *imilarly, for data types that ha)e no corresponding bloc0s, descriptor comparison s ffices, eI i) need not be modified for /oolean )al es either( =o+e)er, determining the eI i)alence of n meric )al es, s ch as rational n mbers, reI ires some tho ght( %(< %C$I#C% C6#*T1#T* 1#% >1CE6* %efined constants and macros are sed hea)ily in Icon to parameteri7e its code for different operating systems and comp ter architect res and to pro)ide simple, high-le)el constr ctions for commonly occ rring code seI ences that other+ise +o ld be comple. and obsc re(

3"

These defined constants and macros sho ld be sed consistently +hen ma0ing additions to Icon instead of sing ad hoc constr ctions( This impro)es portability, readability, and consistency( &earning the meanings and appropriate se of the e.isting defined constants and macro definitions reI ires in)estment of time and energy( 6nce learned, ho+e)er, coding is faster, simpler, and less prone to error( /(4(1 /efined Constants The follo+ing defined constants are sed freI ently in the r n-time system( This list is by no means e.ha sti)e, for speciali7ed constants, see e.isting f nctions( Cset*i7e &og= ge &oglnt*i7e >a.C)t&en >a.&ong >a.*hort >a.*tr&en >in&ist*lots >in&ong >in*hort 9ord*i7e n mber of +ords needed for 2AB bits one pl s the ma.im m base-l6 e.ponent of a C do ble base-210garithm of n mber of bits in a C int length of the longest possible string obtained by con)ersion largest C long largest C short longest possible string minim m n mber of slots in a list-element bloc0 smallest C long smallest C short n mber of bytes in a +ord

/(4(2 3acros The follo+ing macros are sed freI ently in the r n-time system( *ee h !rt(h and icon.!gc(h for the definitions, and see e.isting ro tines for e.amples of sages( 1rg3n4 1rgType3n4 1rg'al3n4 /l0&oc3d4 /l0*i7e3cp4 /l0Type3cp4 Ch0# ll3d4 Cset6ff3b4 CsetPtr3b, c4 %eEef3 d4 CIl%esc3d1, d24 "etEeal3dp, r4 Int'al3d4 >a.3i, 54 >in3i( 54 >0int3i, dp4 6ffset3d4 Pointer3d4 H al3d4 *etb3b, c4 *lot# m3i, 54 *tr&en3I4 *tr&oc3I4 nth arg ment to f nction d-+ord of nth arg ment to f nction integer )al e of )-+ord of nth arg ment to f nction pointer to bloc0 from )-+ord of d si7e of bloc0 pointed to by cp type code of bloc0 pointed to by cp tr e if d is a n ll-)al ed descriptor offset in a +ord of cset bit b address of +ord c containing cset bit b dereference d tr e if d1 and d2 are identical descriptors get real n mber into r from descriptor pointed to by dp integer )al e of )-+ord of d ma.im m of i and 5 minim m of i and 5 ma0e integer from i in descriptor pointed to by dp offset from d-+ord of )ariable descriptor d tr e if )-+ord of d is a pointer tr e if d is a I alifier set bit b in cset c *lot for hash n mber i gi)en 5 total slots length of string referenced by I location of string referenced by I

3"!

Testb3b, c4 T)ar3d4 T)ar&oc3d4 Type3d4 'ar3d4 'ar&oc3d4 'si7eof3.4 '+si7eof3.4 9si7eof3.4

tr e if bit b in cset c is one tr e if d is a trapped )ariable pointer to trapped )ariable from )-+ord of d type code in d-+ord of d tr e if d is a )ariable descriptor pointer to )al e descriptor from )-+ord of d si7e of str ct re . less )ariable array at end si7e of str ct re . in +ords less )ariable array at end si7e of str ct re . in +ords

-.) ,#pport *o#tines


There are many s pport ro tines for performing tas0s that occ r freI ently in the Icon r n-time system( >ost of these ro tines are in files in icon. that begin +ith the letter r( The ses of many of these s pport ro tines ha)e been ill strated earlier, +hat follo+s is a catalog for reference( /(5(1 Comparison The follo+ing ro tines in icon.!rcomp(c perform comparisons@ anycmp3dp1, dp24 Compare the descriptors pointed to by dp1 and dp2 as Icon )al es in sorting order, ret rning a )al e greater than 0, 0, or less than 0 depending on +hether the descriptor pointed to by dp1 is respecti)ely greater than, eI al to, or less than the descriptor pointed to by dp2( eI i)3dp1, dp24 Test for eI i)alence of descriptors pointed to by dp1 and dp2, ret rning 1 if eI i)alent and 0 other

3""

3"#

2ppendix 4: Pro9ects

3#&

2ppendix 6: )olutions to )elected 4xercises

3#1

2ppendix ;: The *TL *un+Time Language


This appendi. contains a description of the lang age sed to implement the r n-time operations of the Icon compiler system( Chapter A pro)ides a description of the design goals of the implementation lang age and an introd ction to it( *ome of the design decisions for the lang age +ere moti)ated by optimi7ations planned for the f t re, s ch as constant folding of csets( The se of these feat res is presented as if the optimi7ations +ere implemented, this ins res that the optimi7ations +ill be s pported by the r n-time system +hen they are implemented( This appendi. is adapted from the reference man al for the lang age Q(ipdF:(R( The translator for the implementation lang age is the program rtt( 1n rtt inp t file may contain operation definitions +ritten in the implementation lang age, along +ith C definitions and declarations( Ett has a b ilt-in C preprocessor based on the 1#*I C *tandard, b t +ith e.tensions to s pport m lti-line macros +ith embedded preprocessor directi)es Q(ipdBA(R( Ett prepends a standard incl de file, grttin(h, on the front of e)ery implementation lang age file it translates( The first part of this appendi. describes the operation definitions( C lang age doc mentation sho ld be cons lted for ordinary C grammar( The e.tensions to ordinary C grammar are described in the latter part of the appendi.( The grammar for the implementation lang age is presented in e.tended /#$ notation( Terminal symbols are set in =el)etica( #on-terminals and meta-symbols are set in 7imes5talic( In addition to the s al meta-symbols, $$N for \\is defined asJJ and E for \\alternati)esJJ, brac0ets aro nd a seI ence of symbols indicates that the seI ence is optional, braces aro nd a seI ence of symbols follo+ed by an asteris0 indicates that the seI ence may be repeated 7ero or more times, and braces follo+ed by a pl s indicates that the enclosed seI ence may be repeated one or more times(

9.1 Operation -oc#mentation


1n operation definition can be preceded by an optional description in the form of a C string literal(
documented-definition ::= [ C-string-literal ] operationdefinition

The se of a C string allo+s an implementation file to be r n thro gh the C preprocessor +itho t altering the description( The preprocessor concatenates ad5acent string literals, allo+ing a m lti-line description to be +ritten sing m ltiple strings( 1lternati)ely, a m ltiline description can be +ritten sing \J for line contin ation( This description is stored in the operation data base +here it can be e.tracted by doc mentation generation programs( These doc mentation generators prod ce formatted doc mentation for Icon programmers and for C programmers maintaining the Icon implementation( The doc mentation generators are responsible for inserting ne+line characters at reasonable points +hen printing the description(

9.$ Types of Operations


Ett can be sed to define the b ilt-in f nctions, operators, and 0ey+ords of the Icon lang age( 3#ote that there are some Icon constr cts that fall o tside this implementation

3#2

specification system( These incl de control str ct res s ch as string scanning and limitation, along +ith record constr ctors and field references(4
operation-definition ::= function result-seq identifier ( [ param-list ] ) [ declare ] actions end | operator result-seq op identifier ( [ param-list ] ) [ declare ] actions end | keyword result-seq identifier actions end | keyword result-seq identifier const key-const end result-seq ::= { length , length [ + ] } | { length [ + ] } | { } length ::= integer | *

result-se indicates the minim m and ma.im m length of the res lt seI ence of an operation 3the operation is treated as if it is sed in a conte.t +here it prod ces all of its res lts4( $or e.ample, addition al+ays prod ces one res lt so its result-se is Y1, 1a( If the minim m and ma.im m are the same, only one n mber need be gi)en, so the result-se for addition can be coded as Y1a( 1 conditional operation can prod ce either no res lts 3that is, it can fail4 or it can prod ce one res lt, so its result-se is Y0, 1a( 1 length of indicates nbo nded, so the result-se of M is indicated by Y0, a( 1n in the lo+er bo nd means the same thing as 0, so Y0, a can be +ritten as Y, a, +hich simplifies to Ya( 1 result-se of Ya indicates no res lt seI ence( This is not the same as a 7ero-length res lt seI ence, Y0a, an operation +ith no res lt seI ence does not e)en fail( e.it is an e.ample of s ch an operation( 1 N follo+ing the length3s4 in a result-se indicates that the operation can be res med to perform some side effect after prod cing its last res lt( 1ll e.isting e.amples of s ch operations prod ce at most one res lt, performing a side effect in the process( The side effect on res mption is simply an ndoing of the original side effect( 1n e.ample of this is tab, +hich changes Lpos as the side effect( $or f nctions and 0ey+ords, identifier is the name by +hich the operation is 0no+n +ithin the Icon lang age 3for 0ey+ords, identifier does not incl de the L4( #e+ f nctions and 0ey+ords can be added to the lang age by simply translating implementations for them( $or operations, op is 3 s ally4 the symbol by +hich the operation is 0no+n +ithin the Icon lang age and identifier is a descripti)e name( It is possible to ha)e more than one operation +ith the same op as long as they ha)e different identifiers and ta0e a different n mber of operands( In addition to translating the implementation for an operator, adding a ne+ operator reI ires pdating iconcJs le.ical analy7er and parser to 0no+ abo t the symbol 3in reality, an operator definition may be sed for operations +ith non-operator synta., in +hich case any synta. may be sed, iconcJs code generator identifies the operation by the type of node p t in the parse tree by a parser action4( In all cases, the identifier is sed to constr ct the name3s4 of the C f nction3s4 +hich implement the operation( 1 param-list is a comma separated list of parameter declarations( *ome operations, s ch as the +rite f nction, ta0e a )ariable n mber of arg ments( This is indicated by appending a pair of brac0ets enclosing an identifier to the last parameter declaration( This last parameter is then an array containing the tail of the arg ment list, that is, those arg ments not ta0en p by the preceding parameters( The identifier in brac0ets represents the length of the tail and has a type of C integer(
param-list ::= param { , param } [ [ identifier ] ]

3#3

>ost operations need their arg ments dereferenced( =o+e)er, some operations, s ch as assignment, need ndereferenced arg ments and a fe+ need both dereferenced and ndereferenced )ersions of an arg ment( There are forms of parameter declarations to match each of these needs(
param ::= identifier | underef identifier | underef identifier -> identifier

1 simple identifier indicates a dereferenced parameter( nderef indicates an ndereferenced parameter( In the third form of parameter declaration, the first identifier represents the ndeferenced form of the arg ment and the second identifier represents the dereferenced form( This third form of declaration may not be sed +ith the )ariable part of an arg ment list( These identifiers are of type descriptor( %escriptors are implemented as C str cts( *ee Chapter < for a detailed e.planation of descriptors( C.amples of operation headers@
detab(s,i,...) - replace tabs with spaces, with stops at columns indicated. function{1} detab(s, i[n]) actions end x <-> y -swap values of x and y. Reverses swap if resumed. operator{0,1+} <-> rswap(underef x -> dx, underef y -> dy) declare actions end &fail -just fail keyword{0} fail actions end

9.& -eclare Cla#se


*ome operations need C declarations that are common to se)eral actions( These can be declared +ithin the declare cla se(
declare ::= declare { C declarations }

These may incl de tended declarations, +hich are e.plained belo+ in the section on e.tensions to C( If a declaration can be made local to a bloc0 of embedded C code, it is s ally better to p t it there than in a declare cla se( This is e.plained belo+ in the disc ssion of the body action( Constant Keywords 1ny 0ey+ord can be implemented sing general actions( =o+e)er, for constant 0ey+ords, iconc can sometimes prod ce more efficient code if it treats the 0ey+ord as a literal constant( Therefore, a special declaration is a)ailable for declaring 0ey+ords that can be represented as Icon literals( The constant is introd ced +ith the +ord const and can be one of fo r literal types(
key-const ::= string-literal | cset-literal | integerliteral | real-literal

9hen sing this mechanism, it is important to be a+are of the fact that rtt to0eni7es these literals as C literals, not as Icon literals( The contents of string literals and character literals

3#4

3 sed to represent cset literals4 are not interpreted by rtt e.cept for certain sit ations in string concatenation 3see Q(ipdBA(R4( They are simply stored, as is, in the data base( This means that literals +ith escape seI ences can be sed e)en +hen C and Icon +o ld gi)e them different interpretations( =o+e)er, C does not recogni7e control escapes, so JSJJ, +hich is a )alid Icon literal, +ill res lt in an error message from rtt, beca se the second I ote ends the literal, lea)ing the third I ote dangling( 6nly decimal integer literals are allo+ed(

9.( Actions
1ll operations other than constant 0ey+ords are implemented +ith general actions( 1ctions fall into fo r categories@ type chec0ing and con)ersions, detail code e.pressed in e.tended C, abstract type comp tations, and error reporting(
actions ::= { action }* action ::= checking-conversions | detail-code | abstract { type-computations } | runerr( msg_number [ , descriptor ] ) [ ; ] { actions }

T8pe Chec1ing and Conversions The type chec0ing and con)ersions are
checking-conversions ::= if type-check then action | if type-check then action else action | type_case descriptor of { { typeselect }+ } | len_case identifier of { { integer : action }+ default : action } type-select ::= { type-name : }+ action | default : action

These actions specify r n-time operations( These operations co ld be performed in C, b t specifying them in the implementation lang age gi)es the compiler information it can se to generate better code( The if actions se the res lt of a type-chec" e.pression to select an action( The typeVcase action selects an action based on the type of a descriptor( If a typeVcase action contains a defa lt cla se, it m st be last( type-select cla ses m st be m t ally e.cl si)e in their selection( The lenVcase action selects an action based on the length of the )ariable part of the arg ment list of the operation( The identifier in this action m st be the one representing that length( 1 type-chec" can s cceed or fail( It is either an assertion of the type of a descriptor, a con)ersion of the type of a descriptor, or a logical e.pression in)ol)ing type-chec"s( 6nly limited forms of logical e.pressions are s pported(
type-check ::= simple-check { && simple-check }* | ! simple-check simple-check ::= is: type-name ( descriptor ) | cnv: dest-type ( source [ , destination ] ) |

3#5

def: dest-type ( source , value [ , destination ] ) dest-type ::= cset | integer | real | string | C_integer | C_double | C_string | (exact)integer | (exact)C_integer | tmp_string | tmp_cset

The is chec0 s cceeds if the )al e of the descriptor is in the type indicated by type-name( Con)ersions indicated by cn) are the con)ersions bet+een the Icon types of cset, integer, real, and string( Con)ersions indicated by def are the same con)ersions +ith a defa lt )al e to be sed if the original )al e is n ll( dest-type is the type to +hich to a )al e is to be con)erted, if possible( cset, integer, real, and string constit te a s bset of icon-type +hich is in t rn a s bset of type-name 3see belo+4( CVinteger, CVstring, and CVdo ble are con)ersions to internal C types that are easier to manip late than descriptors( Cach of these types corresponds to an Icon type( 1 con)ersion to an internal C type s cceeds for the same )al es that a con)ersion to the corresponding Icon type s cceeds( CVinteger represents the C integer type sed for integer )al es in the partic lar Icon implementation being compiled 3typically, a 32-bit integer type4( C-do ble represents the C do ble type( C-string represents a pointer to a n llterminated C character array( =o+e)er, see belo+ for a disc ssion of the destination for con)ersion to CVstring( 3e.act4 before integer or CVinteger disallo+s con)ersions from reals or strings representing reals, that is, the con)ersion fails if the )al e being con)erted represents a real )al e( Con)ersion to tmpVstring is the same as con)ersion to string 3the res lt is a descriptor4, e.cept that the string is only g aranteed to e.ist for the lifetime of the operation 3the lifetime of a s spended operation e.tends ntil it can no longer be res med4( Con)ersion to tmpVstring is generally less e.pensi)e than con)ersion to string and is ne)er more e.pensi)e, b t the res lting string m st not be e.ported from the operation( tmpVcset is analogo s to tmpVstring( The so rce of the con)ersion is the descriptor +hose )al e is to be con)erted( If no destination is specified, the con)ersion is done \\in-placeJJ( =o+e)er, it may not act ally be possible to do an arg ment con)ersion in the arg mentJs original location, so the arg ment may be copied to another location as part of the con)ersion( 9ithin the scope of the con)ersion, the parameter name refers to this ne+ location( The scope of a con)ersion is s ally only important for con)ersions to C types, the r n-time system translator and the Icon compiler try to 0eep the mo)ement of descriptor parameters transparent 3see belo+ for more details4( 1ll elements of the )ariable part of an arg ment list m st be descriptors( Therefore, +hen an element is con)erted to a C type, an e.plicit location m st be gi)en for the destination( The destinations for con)ersions to cset, integer, real, string, 3e.act4integer, tmpVstring, and tmpVcset m st be descriptors( The destinations for con)ersions to CVinteger, CVdo ble, and 3e.act4CVinteger m st be the corresponding C types( =o+e)er, the destination for con)ersion to CVstring m st be tended( If the destination is declared as

3#

\\tended char JJ, then the d+ord 3string length4 of the tended location +ill be set, b t the operation +ill not ha)e direct access to it( The )ariable +ill loo0 li0e a \\char JJ( /eca se the operation does not ha)e access to the string length, it is not a good idea to change the pointer once it has been set by the con)ersion( If the destination is declared as a descriptor, the operation has access to both the pointer to the string and the stringJs length 3+hich incl des the terminating n ll character4( If a parameter is con)erted to CVstring and no e.plicit destination is gi)en, the parameter +ill beha)e li0e a \\tended char JJ +ithin the scope of the con)ersion( The second arg ment to the def con)ersion is the defa lt )al e( The defa lt )al e may be any C e.pression that e)al ates to the correct type( These types are gi)en in the follo+ing chart( cset@ integer@ real@ string@ CVinteger@ CVdo ble@ CVstring@ tmpVstring@ tmpVcset@ str ct bVcset CVinteger do ble str ct descrip CVinteger do ble char K str ct descrip str ct bVcset

3e.act4integer@ CVinteger 3e.act4CVinte CVinteger ger@ The n meric operators pro)ide good e.amples of ho+ con)ersions are sed@
operator{1} / divide(x, y) if cnv:(exact)C_integer(x) && cnv:(exact)C_integer(y) then actions else { if !cnv:C_double(x) then runerr(102, x) if !cnv:C_double(y) then runerr(102, y) actions } end

9ithin the code indicated by actions, . and y refer to C )al es rather than to the Icon descriptors of the ncon)erted parameters( The s b5ect of any type chec0 or type con)ersion m st be an nmodified parameter( $or e.ample, once an in-place con)ersion has been applied to a parameter, another con)ersion may not be applied to the same parameter( This helps ins re that type comp tations in

3#!

iconc only in)ol)e the nmodified types of arg ments, simplifying those comp tations( This restriction does not apply to type chec0ing and con)ersions in C code( )cope of Conversions The follo+ing disc ssion is incl ded mostly for completeness( The scope of con)ersions so nds complicated, b t in practice problems seldom occ r in code that \\loo0s reasonableJJ( If a problem does occ r, the translator catches it( #ormally, the intricacies of scope sho ld be ignored and the person +riting r n-time ro tines sho ld code con)ersions in a manner that seems nat ral( 1n 2in-place2 con)ersion of a parameter can create a scope for the parameter name separate from the one introd ced by the parameter list( This is beca se con)ersions to C types may reI ire the con)erted )al e to be placed in a different location +ith a different type( The parameter name is then associated +ith this ne+ location( The original scope of a parameter starts at the beginning of the operationJs definition( The scope of a con)ersion starts at the con)ersion( 1 scope e.tends thro gh all code that may be e.ec ted after the scopeJs beginning, p to a r nerr or a con)ersion that hides the pre)io s scope 3beca se the type chec0ing portion of the implementation lang age does not contain loops or arbitrary gotos, scope can easily be determined le.ically4( The se of an in-place con)ersion in the first s b-e.pression of a con5 nction, cnv1 OO cnv2, has a potential for ca sing problems( In general, there is no +ay to 0no+ +hether the first con)ersion +ill effecti)ely be ndone +hen the second con)ersion fails( If the first con)ersion is act ally done in-place, the parameter name refers to the same location in both the s ccess and fail re scope of the con5 nction, so the con)ersion is not ndone( If the con)ersion is done into a separate location, the fail re scope +ill refer to the original )al e, so the con)ersion +ill effecti)ely be ndone( 9hether the con)ersion is act ally done in-place depends on the conte.t in +hich operation is sed( =o+e)er, con)ersion to CVinteger and CVdo ble al+ays preser)e the original )al e, so there is no potential problem sing them as the first arg ment to a con5 nction, nor is there any problem sing a non-con)ersion test there( 1n e.ample of this ncertainty@
if cnv:string(s1) && cnv:string(s2) then { /* s1 and s2 both refer to converted values */ } else { /* s2 refers to the original value. s1 may refer to either the original or the converted value */ }

The translator iss es a +arning if there is a potential problem( It is possible for scopes to o)erlap, this happens beca se scopes start +ithin conditional actions( In rare instances, e.ec table code sing the name may appear +ithin this o)erlapping scope, as in the follo+ing e.ample, +hich resembles code that might be fo nd in the definition of a string analysis f nction s ch as find(
if is:null(s) then { if !def:C_integer(i, k_pos) then runerr(101, i) } else { if !def:C_integer(i, 1) then runerr(101, i)

3#"

actions

=ere, actions occ rs +ithin the scope of both con)ersions( #ote that actions is not in the scope of the original parameter i( This is beca se that scope is ended in each branch of the o ter if by the con)ersions and the r nerrs( If o)erlap does occ r, the translator tries to ins re that the same location is sed for the name in each scope( The only sit ation +hen it cannot do this is +hen the type of the location is different in each scope, for instance, one is a CVinteger and the other is a CVreal( If a name is referenced +hen there is conflicting scope, the translator iss es an error message( T8pe .ames The type-names represent types of Icon intermediate )al es, incl ding )ariable references( These are the )al es that enter and lea)e an operation, \\typesJJ internal to data str ct res, s ch as list element bloc0s, are handled completely +ithin the C code(
type-name ::= empty_type | icon-type | variable-ref icon-type ::= null | string | cset | integer | real | file | list | set | table | record | procedure | co_expression variable-ref ::= variable | tvsubs | tvtbl | kywdint | kywdpos | kywdsubj

The type-names are not limited to the first-class types of IconJs lang age definition( The type-names that do not follo+ directly from Icon types need f rther e.planation( emptyVtype is the type containing no )al es and is needed for con)eying certain information to the type inferencing system, s ch as an nreachable state( $or e.ample, the res lt type of stop is emptyVtype( It may also be sed as the internal type of an empty str ct re( Contrast this +ith n ll, +hich consists of the n ll )al e( 'ariable references are not first-class )al es in Icon, they cannot be assigned to )ariables( =o+e)er, they do appear in the definition of Icon as arg ments to assignments and as the s b5ect of dereferencing( $or e.ample, the semantics of the e.pression
s[3] := s

can be described in terms of a s bstring trapped )ariable and a simple )ariable reference( $or this reason, it is necessary to incl de these references in the type system of the implementation lang age( )ariable consists of all )ariable references( It contains fi)e disting ished s btypes( t)s bs contains all s bstring trapped )ariables( t)tbl contains all

3##

table-element trapped )ariables( 0y+dint contains Lrandom and Ltrace( 0y+dpos contains Lpos( 0y+ds b5 contains Ls b5ect( Including C Code 1s noted abo)e, C declarations can be incl ded in a declare cla se( Cmbedded C code may reference these declarations as +ell as declarations global to the operation( C.ec table C code can be incl ded sing one of t+o actions(
detail-code ::= body { extended-C } | inline { extended-C }

body and inline are similar to each other, e.cept that inline indicates code that is reasonable for the compiler to p t in-line +hen it can( body indicates that for the in-line )ersion of the operation, this piece of C code sho ld be p t in a separate f nction in the lin0 library and the body action sho ld be replaced by a call to that f nction( 1ny parameters or )ariables from the declare cla se needed by the f nction m st be passed as arg ments to the f nction( Therefore, it is more efficient to declare )ariables needed by a body action +ithin that body than +ithin the declare( =o+e)er, the scope of these local )ariables is limited to the body action( >ost Icon 0ey+ords pro)ide e.amples of operations that sho ld be generated in-line( In the follo+ing e.ample, n lldesc is a global )ariable of type descriptor( It is defined in the incl de files a tomatically incl ded by rtt(
&null - the null value. keyword{1} null abstract { return null } inline { return nulldesc; } end

4rror *eporting
runerr( msg_number [ , descriptor ] ) [ ; ]

r nerr is translated into a call to the r n-time error handling ro tine( *pecifying this as a separate action rather than a C e.pression +ithin a body or inline action gi)es the compiler additional information abo t the beha)ior of the operation( msgPnumber is the n mber sed to loo0 p the error message in a r n-time error table( If a descriptor is gi)en, it is ta0en to be the offending )al e( 2-stract T8pe Computations
abstract { type-computations }

The beha)ior of an operation +ith respect to types is a simplification of the f ll semantics of the operation( $or e.ample, the semantics of the f nction image is to prod ce the string representing its operand, its beha)ior in the type realm is described as simply ret rning some string( In general, a good simplification of an operation is too complicated to be a tomatically prod ced from the operationJs implementation 3of co rse, it is al+ays possible to concl de that an operation can prod ce any type and can ha)e any side effect,

4&&

b t that is hardly sef l4( $or this reason, the programmer m st se the abstract action to specify type-computations(
type-computations ::= { store [ type ] = type [ ; ] } [ return type [ ; ] ]

type-computations consist of side effects and a statement of the res lt type of the operation( There m st be e.actly one ret rn type along any path from the start of the operation to C code containing a ret rn, s spend, or fail( 1 side effect is represented as an assignment to the store( The store is analogo s to program memory( Program memory is made p of locations containing )al es( The store is made p of locations containing types( 1 type represents a set of )al es, tho gh only certain s ch sets correspond to types for the p rpose of abstract type comp tations( Types may be basic types s ch as all Icon integers, or they may be composite types s ch as all Icon integers combined +ith all Icon strings( The r les for specifying types are gi)en belo+( 1 location in the store may correspond to one location in program memory, or it may correspond to se)eral or e)en an nbo nded n mber of locations in program memory( The contents of a location in the store can be tho ght of as a conser)ati)e 3that is, possibly o)erestimated4 s mmary of )al es that might appear in the corresponding location3s4 in program memory at r n time( Program memory can be accessed thro gh a pointer( *imilarly, the store can be inde.ed by a pointer type, sing an e.pression of the form storeQ typeI, to get at a gi)en location( 1n Icon global )ariable has a location in program memory, and a reference to s ch a )ariable in an Icon program is treated as a pointer to that location( *imilarly, an Icon global )ariable has a location in the store and, d ring type inferencing, a reference to the )ariable is interpreted as a pointer type inde.ing that location in the store( /eca se types can be composite, inde.ing into the store +ith a pointer type may act ally inde. se)eral locations( * ppose +e ha)e the follo+ing side effect
store[ type1 ] = type2

* ppose d ring type inferencing type1 e)al ates to a composite pointer type consisting of the pointer types for se)eral global )ariables, then all corresponding locations in the store +ill be pdated( If the abo)e side effect is coded in the assignment operator, this sit ation might res lt from an Icon e.pression s ch as
every (x | y) := &null

In this e.ample, it is ob)io s that both )ariables are changed to the n ll type( =o+e)er, type inferencing can only ded ce that at least one )ariable in the set is changed( Th s, it m st ass me that each co ld either be changed or left as is( It is only +hen the left hand side of the side effect represents a niI e program )ariable that type inferencing 0no+s that the )ariable cannot be left as is( In the c rrent implementation of type inferencing, assignment to a single named )ariable is the only side effect +here type inferencing recogni7es that the side effect +ill definitely occ r( Inde.ing into the store +ith a non-pointer type corresponds to assigning to a non-)ariable( * ch an assignment res lts in error termination( Type inferencing ignores any non-pointer components in the inde. type, they represent e.ec tion paths that donJt contin e and th s contrib te nothing to the types of e.pressions( 1 type in an abstract type comp tation is of the form
type ::= type-name | type ( variable ) | attrb-ref |

4&1

new type-name ( type { , type } ) | store [ type ] | type ++ type | type ** type | ( type )

The type<variable= e.pression allo+s type comp tations to be e.pressed in terms of the type of an arg ment to an operation( This m st be an nmodified arg ment( That is, the abstract type comp tation in)ol)ing this e.pression m st not be +ithin the scope of a con)ersion( This restriction simplifies the comp tations needed to perform type inferencing( This e.pression is sef l in se)eral conte.ts, incl ding operations that deal +ith str ct re types( The type system for a program may ha)e se)eral s b-types for a str ct re type( The str ct re types are list, table, set, record, s bstring trapped )ariable, and table-element trapped )ariable( Cach of these Icon types is a composite type +ithin the type comp tations, rather than a basic type( Th s the type inferencing system may be able to determine a more acc rate type for an arg ment than can be e.pressed +ith a type-name( $or e.ample, it is more acc rate to se
if is:list(x) then abstract { return type(x) } actions else runerr(108, x)

than it is to se
if is:list(x) then abstract { return list } actions else runerr(108, x)

*tr ct re )al es ha)e internal \\str ct reJJ( *tr ct re types also need an internal str ct re that s mmari7es the str ct re of the )al es they contain( This str ct re is implemented +ith type attrib tes( These attrib tes are referenced sing dot notation@
attrb-ref ::= type . attrb-name attrb-name ::= lst_elem | set_elem | key | tbl_elem | default | all_fields | str_var | trpd_tbl

J st as )al es internal to str ct re )al es are stored in program memory, types internal to str ct re types are 0ept in the store( 1n attrib te is a pointer type referencing a location in the store( 1 list is made p of 3 nnamed4 )ariables( The lstVelem attrib te of a list type is a type representing all the )ariables contained in all the lists in the type( $or e.ample, part of the code for the bang operator is as follo+s, +here d. is the dereferenced operand(

4&2

type_case dx of { list: { abstract { return type(dx).lst_elem } actions } ...

This code fragment indicates that, if the arg ment to bang is in a list type, bang ret rns some )ariable from some list in that type( In the type realm, bang ret rns a basic pointer type( The setVelem attrib te of a set type is similar( The locations of a set ne)er \\escapeJJ as )ariables( That is, it is not possible to assign to an element of a set( This is reflected in the fact that a setVelem is al+ays sed as the inde. to the store and is ne)er assigned to another location or ret rned from an operation( The case in the code from bang for sets is
set: { abstract { return store[type(dx).set_elem] } actions }

Tables types ha)e three attrib tes( 0ey references a location in the store containing the type of any possible 0ey )al e in any table in the table type( tblVelem references a location containing the type of any possible element in any table in the table type( defa lt references a location containing the type of any possible defa lt )al e for any table in the table type( 6nly tblVelem corresponds to a )ariable in Icon( The others m st appear as inde.es into the store( Eecord types are implemented +ith a location in the store for each field, b t these locations cannot be accessed separately in the type comp tations of the implementation lang age( These are only needed separately d ring record creation and field reference, +hich are handled as special cases in the compiler( Cach record type does ha)e one attrib te, allVfields, a)ailable to type comp tations( It is a composite type and incl des the pointer types for each of the fields( * bstring trapped )ariables are implemented as str ct res( $or this reason, they need str ct re types to describe them( The part of the str ct re of interest in type inferencing is the reference to the nderlying )ariable( This is reflected in the one attrib te of these types, strV)ar( It is a reference to a location in the store containing the pointer types of the nderlying the )ariables that are \\trappedJJ( strV)ar is only sed as an inde. into the store, it is ne)er e.ported from an operation( *imilarly table-element trapped )ariables need str ct re types to implement them( They ha)e one attrib te, trpdVtbl, referencing a location in the store containing the type of the nderlying table( The 0ey type is not 0ept separately in the trapped )ariable type, it m st be immediately added to the table +hen a table-element trapped )ariable type is created( This pessimistically ass mes that the 0ey type +ill e)ent ally be p t in the table, b t sa)es an attrib te in the trapped )ariable for the 0ey( trpdVtbl is only sed as an inde. into the store, it is ne)er e.ported from an operation(

4&3

The type comp tation, ne+, indicates that an in)ocation of the operation being implemented creates a ne+ instance of a )al e in the specified str ct re type( $or e.ample, the implementation of the list f nction is
function{1} list(size, initial) abstract { return new list(type(initial)) } actions end

The type arg ments to the ne+ comp tation specify the initial )al es for the attributes of the str ct re( The table type is the only one that contains m ltiple attrib tes( 3#ote that record constr ctors are created d ring translation and are not specified )ia the implementation lang age(4 Table attrib tes m st be gi)en in the order@ 0ey, tblVelem, and defa lt( In the type system for a gi)en program, a str ct re type is partitioned into se)eral s btypes 3these s b-types are only disting ished d ring type inferencing, not at r n time4( 6ne of these s b-types is allocated for e)ery easily recogni7ed se of an operation that creates a ne+ )al e for the str ct re type( Th s, the follo+ing Icon program has t+o list s btypes@ one for each in)ocation of list(
procedure main() local x x := list(1, list(100)) end

T+o operations are a)ailable for combining types( Union is denoted by the operator \NNJ and intersection is denoted by the operator \KKJ( Intersection has the higher precedence( These operations interpret types as sets of )al es( =o+e)er, beca se types may be infinite, these sets are treated symbolically( C 4xtensions The C code incl ded sing the declare, body, and inline actions may contain se)eral constr cts beyond those of standard C( There are fi)e categories of C e.tensions@ access to interface )ariables, declarations, type con)ersions!type chec0s, signaling r n-time errors, and ret rn statements( In addition to their se in the body of an operation, the con)ersions and chec0s, r n-time error, and declaration e.tensions may be sed in ordinary C f nctions that are p t thro gh the implementation lang age translator( Interface ,aria-les Interface )ariables incl de parameters, the identifier for length of the )ariable part of an arg ment list, and the special )ariable res lt( Uncon)erted parameters, con)erted parameters +ith Icon types, and con)erted parameters +ith the internal types tmpVstring and tmpVcset are descriptors and +ithin the C code ha)e the type str ct descrip( Con)erted parameters +ith the internal type of CVinteger ha)e some signed integer type +ithin the C code, b t e.actly +hich C integer type )aries bet+een systems( This type has been set p sing a typedef in the a tomatically incl ded incl de file so it is a)ailable for

4&4

se in declarations in C code( Con)erted parameters +ith the internal type of CVdo ble ha)e the type do ble +ithin the C code( Con)erted parameters of the type CVstring ha)e the type char ( The length of the )ariable part of a arg ment list has the type int +ithin the C code( res lt is a special descriptor )ariable( Under some circ mstances it is more efficient to constr ct a ret rn )al e in this descriptor than to se other methods( *ee *ection A of the implementation lang age reference man al for details( /eclarations The e.tension to declarations consists of a ne+ storage class specifier, tended 3register is an e.ample of an e.isting storage class specifier4( Understanding its se reI ires some 0no+ledge of Icon storage management( 6nly a brief description of storage management is gi)en here, see the Icon implementation boo0 for f rther details( Icon )al es are represented by descriptors( 1 descriptor contains both type information and )al e information( $or large )al es 3e)erything other than integers and the n ll )al e4 the descriptor only contains a pointer to the )al e, +hich resides else+here( 9hen s ch a )al e is dynamically created, memory for it is allocated from one of se)eral memory regions( *trings are allocated from the string region( 1ll other relocatable )al es are allocated from the bloc" region( The only non-relocatable )al es are co-e.pression stac0s and co-e.pression acti)ation bloc0s( 6n some systems non-relocatable )al es are allocated in the static region( 6n other systems there is no static region and these )al es are allocated sing the C malloc f nction( 9hen a storage reI est is made to a region and there is not eno gh room in that region, a garbage collection occ rs( 1ll reachable )al es for each region are located( 'al es in the string and bloc0 regions are mo)ed into a contig o s area at the bottom of the region, creating 3hopef lly4 free space at the end of the region( Unreachable co-e.pression stac0s and acti)ator bloc0s are \\freedJJ( The garbage collector m st be able to recogni7e and sa)e all )al es that might be referenced after the garbage collection and it m st be able to find and pdate all pointers to the relocated )al es( 6peration arg ments that contain pointers into one of these regions can al+ays be fo nd by garbage collection( The implementations of many operations need other descriptors or pointers into memory regions( The tended storage class identifies those descriptors and pointers that may ha)e live )al es +hen a garbage collection co ld occ r 3that is, +hen a memory allocation is performed4( 1 descriptor is implemented as a C str ct named descrip, so an e.ample of a tended descriptor declaration is
tended struct descrip d;

/loc0s are also implemented as C str cts( The follo+ing list ill strates the types of bloc0 pointers that may be tended(
tended tended tended tended tended tended tended tended tended struct struct struct struct struct struct struct struct struct b_real *bp; b_cset *bp; b_file *bp; b_proc *bp; b_list *bp; b_lelem *bp; b_table *bp; b_telem *bp; b_set *bp;

4&5

tended tended tended tended tended tended tended

struct struct struct struct struct struct struct

b_selem *bp; b_record *bp; b_tvkywd *bp; b_tvsubs *bp; b_tvtbl *bp; b_refresh *bp; b_coexpr *cp;

1lternati)ely, a nion pointer can be sed to tend a pointer to any 0ind of bloc0(
tended union block *bp;

Character pointers may also be tended( =o+e)er, garbage collection needs a length associated +ith a pointer into the string region( Unli0e )al es in the bloc0 region, the strings themsel)es do not ha)e a length stored +ith them( "arbage collection treats a tended character pointer as a 7ero-length string( These character pointers are almost al+ays pointers into some string, so garbage collection effecti)ely treats them as 7erolength s bstrings of the strings( The string as a +hole m st be tended by some descriptor so that it is preser)ed( The p rpose of tending a character pointer is to ins re that the pointer is relocated +ith the string it points into( 1n e.ample is
tended char *s1, *s2;

Tended arrays are not s pported( tended may only be sed +ith )ariables of local scope( tended and register are m t ally e.cl si)e( If no initial )al e is gi)en, one is s pplied that is consistent +ith garbage collection( T8pe ConversionsBT8pe Chec1s *ome conditional e.pressions ha)e been added to C( These are based on type chec0s in the type specification part of the implementation lang age(
is: type-name ( source ) cnv: dest-type ( source , destination ) def: dest-type ( source , value , destination )

source m st be an Icon )al e, that is, a descriptor( destination m st be a )ariable +hose type is consistent +ith the con)ersion( These type chec0s may appear any+here a conditional e.pression is )alid in a C program( #ote that is, c)n, and def are reser)ed +ords to disting ish them from labels( The typeVcase statement may be sed in e.tended C( This statement has the same form as the corresponding action, b t in this conte.t, C code replaces the actions in the type-select cla ses( )ignaling *un+time 4rrors r nerr is sed for signaling r n-time errors( It acts li0e a f nction b t may ta0e either 1 or 2 arg ments( The first arg ment is the error n mber( If the error has an associated )al e, the second arg ment is a descriptor containing that )al e( *eturn )tatements There are three statements for lea)ing the e.ec tion of an operation( These are analogo s to the corresponding e.pressions in the Icon lang age(

4&

ret-statments ::= return ret-value ; | suspend ret-value ; | fail ; ret-value ::= descriptor | C_integer expression | C_double expression | C_string expression | descript-constructor

descriptor is an e.pression of type str ct descrip( $or e.ample


{ tended struct descrip dp; ... suspend dp; ... }

Use of CVinteger, CVdo ble, or CVstring to prefi. an e.pression indicates that the e.pression e)al ates to the indicated C type and not to a descriptor( 9hen necessary, a descriptor is constr cted from the res lt of the e.pression, b t +hen possible the Icon compiler prod ces code that can se the ra+ C )al e 3*ee *ection A of the implementation lang age reference man al4( 1s an e.ample, the integer case in the di)ide operation is simply
inline { return C_integer x / y; }

#ote that a ret rned C string m st not be in a local 3dynamic4 character array, it m st ha)e a global lifetime( 1 descript-constructor is an e.pression that e.plicitly con)erts a pointer into a descriptor( It is only )alid in a ret rn statement, beca se it b ilds the descriptor in the implicit location of the ret rn )al e(
descript-constructor ::= string ( length , char-ptr ) | cset ( block-ptr ) | real ( block-ptr ) | file ( block-ptr ) | procedure ( block-ptr ) | list ( block-ptr ) | set ( block-ptr ) | record ( block-ptr ) | table ( block-ptr ) | co_expression ( stack-ptr ) | tvtbl ( block-ptr ) | named_var ( descr-ptr ) | struct_var ( descr-ptr , block-ptr ) | substr ( descr-ptr , start , len ) | kywdint ( descr-ptr ) | kywdpos ( descr-ptr ) | kywdsubj ( descr-ptr )

The arg ments to string are the length of the string and the pointer to the start of the string( bloc"-ptrs are pointers to bloc0s of the corresponding types( stac"-ptr is a pointer to a co-e.pression stac0( descr-ptr is a pointer to a descriptor( namedV)ar is sed to create a reference to a )ariable 3descriptor4 that is not in a bloc0( str ctV)ar is sed to create a

4&!

reference to a )ariable that is in a bloc0( The Icon garbage collector +or0s in terms of +hole bloc0s( It cannot preser)e 5 st a single )ariable in the bloc0, so the descriptor referencing a )ariable m st contain eno gh information for the garbage collector to find the start of the bloc0( That is +hat the bloc"-ptr is for( s bstr creates a s bstring trapped )ariable for the gi)en descriptor, starting point +ithin the string, and length( 0y+dint, 0y+dpos, and 0y+ds b5 create references to 0ey+ord )ariables( #ote that ret rning either CVdo ble expression or s bstr3descr-ptr, start, len4 may trigger a garbage collection(

4&"

9=@ %ree -oc#mentation "icense


'ersion 1(2, #o)ember 2002 Copyright 3C4 2000,2001,2002 $ree *oft+are $o ndation, Inc( A: Temple Place, * ite 330, /oston, >1 02111-130F U*1( C)eryone is permitted to copy and distrib te )erbatim copies of this license doc ment, b t changing it is not allo+ed( 0( PEC1>/&C The p rpose of this &icense is to ma0e a man al, te.tboo0, or other f nctional and sef l doc ment 2free2 in the sense of freedom@ to ass re e)eryone the effecti)e freedom to copy and redistrib te it, +ith or +itho t modifying it, either commercially or noncommercially( *econdarily, this &icense preser)es for the a thor and p blisher a +ay to get credit for their +or0, +hile not being considered responsible for modifications made by others( This &icense is a 0ind of 2copyleft2, +hich means that deri)ati)e +or0s of the doc ment m st themsel)es be free in the same sense( It complements the "#U "eneral P blic &icense, +hich is a copyleft license designed for free soft+are( 9e ha)e designed this &icense in order to se it for man als for free soft+are, beca se free soft+are needs free doc mentation@ a free program sho ld come +ith man als pro)iding the same freedoms that the soft+are does( / t this &icense is not limited to soft+are man als, it can be sed for any te.t al +or0, regardless of s b5ect matter or +hether it is p blished as a printed boo0( 9e recommend this &icense principally for +or0s +hose p rpose is instr ction or reference( 1( 1PP&IC1/I&ITU 1#% %C$I#ITI6#* This &icense applies to any man al or other +or0, in any medi m, that contains a notice placed by the copyright holder saying it can be distrib ted nder the terms of this &icense( * ch a notice grants a +orld-+ide, royalty-free license, nlimited in d ration, to se that +or0 nder the conditions stated herein( The 2%oc ment2, belo+, refers to any s ch man al or +or0( 1ny member of the p blic is a licensee, and is addressed as 2yo 2( Uo accept the license if yo copy, modify or distrib te the +or0 in a +ay reI iring permission nder copyright la+( 1 2>odified 'ersion2 of the %oc ment means any +or0 containing the %oc ment or a portion of it, either copied )erbatim, or +ith modifications and!or translated into another lang age( 1 2*econdary *ection2 is a named appendi. or a front-matter section of the %oc ment that deals e.cl si)ely +ith the relationship of the p blishers or a thors of the %oc ment to the %oc mentJs o)erall s b5ect 3or to related matters4 and contains nothing that co ld fall directly +ithin that o)erall s b5ect( 3Th s, if the %oc ment is in part a te.tboo0 of mathematics, a *econdary *ection may not e.plain any mathematics(4 The relationship co ld be a matter of historical connection +ith the s b5ect or +ith related matters, or of legal, commercial, philosophical, ethical or political position regarding them( The 2In)ariant *ections2 are certain *econdary *ections +hose titles are designated, as being those of In)ariant *ections, in the notice that says that the %oc ment is released nder this &icense( If a section does not fit the abo)e definition of *econdary then it is not allo+ed to be designated as In)ariant( The %oc ment may contain 7ero In)ariant *ections( If the %oc ment does not identify any In)ariant *ections then there are none( The 2Co)er Te.ts2 are certain short passages of te.t that are listed, as $ront-Co)er Te.ts or /ac0-Co)er Te.ts, in the notice that says that the %oc ment is released nder this &icense( 1 $ront-Co)er Te.t may be at most A +ords, and a /ac0-Co)er Te.t may be at most 2A +ords( 1 2Transparent2 copy of the %oc ment means a machine-readable copy, represented in a format +hose specification is a)ailable to the general p blic, that is s itable for re)ising the doc ment straightfor+ardly +ith generic te.t editors or 3for images composed of pi.els4 generic paint programs or 3for dra+ings4 some +idely a)ailable dra+ing editor, and that is s itable for inp t to te.t formatters or for a tomatic translation to a )ariety of formats s itable for inp t to te.t formatters( 1 copy made in an other+ise Transparent file format +hose mar0 p, or absence of mar0 p, has been arranged to th+art or disco rage s bseI ent modification by readers is not Transparent( 1n image format is not Transparent if sed for any s bstantial amo nt of te.t( 1 copy that is not 2Transparent2 is called 26paI e2( C.amples of s itable formats for Transparent copies incl de plain 1*CII +itho t mar0 p, Te.info inp t format, &aTeD inp t format, *">& or D>& sing a p blicly a)ailable %T%, and standard-conforming simple =T>&, Post*cript or P%$ designed for h man modification( C.amples of transparent image formats incl de P#", DC$ and JP"( 6paI e formats incl de proprietary formats that can be read and edited only by proprietary +ord processors, *">& or D>& for +hich the %T% and!or processing tools are not generally a)ailable, and the machine-generated =T>&, Post*cript or P%$ prod ced by some +ord processors for o tp t p rposes only( The 2Title Page2 means, for a printed boo0, the title page itself, pl s s ch follo+ing pages as are needed to hold, legibly, the material this &icense reI ires to appear in the title page( $or +or0s in formats +hich do not ha)e any title page as s ch, 2Title Page2 means the te.t near the most prominent appearance of the +or0Js title, preceding the beginning of the body of the te.t( 1 section 2Cntitled DU[2 means a named s b nit of the %oc ment +hose title either is precisely DU[ or contains DU[ in parentheses follo+ing te.t that translates DU[ in another lang age( 3=ere DU[ stands for a specific section name mentioned belo+, s ch as 21c0no+ledgements2, 2%edications2, 2Cndorsements2, or 2=istory2(4 To 2Preser)e the Title2 of s ch a section +hen yo modify the %oc ment means that it remains a section 2Cntitled DU[2 according to this definition( The %oc ment may incl de 9arranty %isclaimers ne.t to the notice +hich states that this &icense applies to the %oc ment( These 9arranty %isclaimers are considered to be incl ded by reference in this &icense, b t only as regards disclaiming +arranties@ any other implication that these 9arranty %isclaimers may ha)e is )oid and has no effect on the meaning of this &icense( 2( 'CE/1TI> C6PUI#" Uo may copy and distrib te the %oc ment in any medi m, either commercially or noncommercially, pro)ided that this &icense, the copyright notices, and the license notice saying this &icense applies to the %oc ment are reprod ced in all copies, and that yo add no other conditions +hatsoe)er to those of this &icense( Uo may not se technical meas res to obstr ct or control the reading or f rther copying of the copies yo ma0e or distrib te( =o+e)er, yo may accept compensation in e.change for copies( If yo distrib te a large eno gh n mber of copies yo m st also follo+ the conditions in section 3( Uo may also lend copies, nder the same conditions stated abo)e, and yo may p blicly display copies( 3( C6PUI#" I# HU1#TITU If yo p blish printed copies 3or copies in media that commonly ha)e printed co)ers4 of the %oc ment, n mbering more than 100, and the %oc mentJs license notice reI ires Co)er Te.ts, yo m st enclose the copies in co)ers that carry, clearly and legibly, all these Co)er Te.ts@ $ront-Co)er Te.ts on the front co)er, and /ac0-Co)er Te.ts on the bac0 co)er( /oth co)ers m st also clearly and legibly identify yo as the p blisher of these copies( The front co)er m st present the f ll title +ith all +ords of the title eI ally prominent and )isible( Uo may add other material on the co)ers in addition( Copying +ith changes limited to the co)ers, as long as they preser)e the title of the %oc ment and satisfy these conditions, can be treated as )erbatim copying in other respects( If the reI ired te.ts for either co)er are too )ol mino s to fit legibly, yo sho ld p t the first ones listed 3as many as fit reasonably4 on the act al co)er, and contin e the rest onto ad5acent pages( If yo p blish or distrib te 6paI e copies of the %oc ment n mbering more than 100, yo m st either incl de a machine-readable Transparent copy along +ith each 6paI e copy, or state in or +ith each 6paI e copy a comp ter-net+or0 location from +hich the general net+or0- sing p blic has access to do+nload sing p blic-standard net+or0 protocols a complete Transparent copy of the %oc ment, free of added material( If yo se the latter option, yo m st ta0e reasonably pr dent steps, +hen yo begin distrib tion of 6paI e copies in I antity, to ens re that this Transparent copy +ill remain th s accessible at the stated location ntil at least one year after the last time yo distrib te an 6paI e copy 3directly or thro gh yo r agents or retailers4 of that edition to the p blic( It is reI ested, b t not reI ired, that yo contact the a thors of the %oc ment +ell before redistrib ting any large n mber of copies, to gi)e them a chance to pro)ide yo +ith an pdated )ersion of the %oc ment( <( >6%I$IC1TI6#* Uo may copy and distrib te a >odified 'ersion of the %oc ment nder the conditions of sections 2 and 3 abo)e, pro)ided that yo release the >odified 'ersion nder precisely this &icense, +ith the >odified 'ersion filling the role of the %oc ment, th s licensing distrib tion and modification of the >odified 'ersion to +hoe)er possesses a copy of it( In addition, yo m st do these things in the >odified 'ersion@

4&#
1( Use in the Title Page 3and on the co)ers, if any4 a title distinct from that of the %oc ment, and from those of pre)io s )ersions 3+hich sho ld, if there +ere any, be listed in the =istory section of the %oc ment4( Uo may se the same title as a pre)io s )ersion if the original p blisher of that )ersion gi)es permission( /( &ist on the Title Page, as a thors, one or more persons or entities responsible for a thorship of the modifications in the >odified 'ersion, together +ith at least fi)e of the principal a thors of the %oc ment 3all of its principal a thors, if it has fe+er than fi)e4, nless they release yo from this reI irement( C( *tate on the Title page the name of the p blisher of the >odified 'ersion, as the p blisher( %( Preser)e all the copyright notices of the %oc ment( C( 1dd an appropriate copyright notice for yo r modifications ad5acent to the other copyright notices( $( Incl de, immediately after the copyright notices, a license notice gi)ing the p blic permission to se the >odified 'ersion nder the terms of this &icense, in the form sho+n in the 1ddend m belo+( "( Preser)e in that license notice the f ll lists of In)ariant *ections and reI ired Co)er Te.ts gi)en in the %oc mentJs license notice( =( Incl de an naltered copy of this &icense( I( Preser)e the section Cntitled 2=istory2, Preser)e its Title, and add to it an item stating at least the title, year, ne+ a thors, and p blisher of the >odified 'ersion as gi)en on the Title Page( If there is no section Cntitled 2=istory2 in the %oc ment, create one stating the title, year, a thors, and p blisher of the %oc ment as gi)en on its Title Page, then add an item describing the >odified 'ersion as stated in the pre)io s sentence( J( Preser)e the net+or0 location, if any, gi)en in the %oc ment for p blic access to a Transparent copy of the %oc ment, and li0e+ise the net+or0 locations gi)en in the %oc ment for pre)io s )ersions it +as based on( These may be placed in the 2=istory2 section( Uo may omit a net+or0 location for a +or0 that +as p blished at least fo r years before the %oc ment itself, or if the original p blisher of the )ersion it refers to gi)es permission( 8( $or any section Cntitled 21c0no+ledgements2 or 2%edications2, Preser)e the Title of the section, and preser)e in the section all the s bstance and tone of each of the contrib tor ac0no+ledgements and!or dedications gi)en therein( &( Preser)e all the In)ariant *ections of the %oc ment, naltered in their te.t and in their titles( *ection n mbers or the eI i)alent are not considered part of the section titles( >( %elete any section Cntitled 2Cndorsements2( * ch a section may not be incl ded in the >odified 'ersion( #( %o not retitle any e.isting section to be Cntitled 2Cndorsements2 or to conflict in title +ith any In)ariant *ection( 6( Preser)e any 9arranty %isclaimers( If the >odified 'ersion incl des ne+ front-matter sections or appendices that I alify as *econdary *ections and contain no material copied from the %oc ment, yo may at yo r option designate some or all of these sections as in)ariant( To do this, add their titles to the list of In)ariant *ections in the >odified 'ersionJs license notice( These titles m st be distinct from any other section titles( Uo may add a section Cntitled 2Cndorsements2, pro)ided it contains nothing b t endorsements of yo r >odified 'ersion by )ario s parties--for e.ample, statements of peer re)ie+ or that the te.t has been appro)ed by an organi7ation as the a thoritati)e definition of a standard( Uo may add a passage of p to fi)e +ords as a $ront-Co)er Te.t, and a passage of p to 2A +ords as a /ac0-Co)er Te.t, to the end of the list of Co)er Te.ts in the >odified 'ersion( 6nly one passage of $ront-Co)er Te.t and one of /ac0-Co)er Te.t may be added by 3or thro gh arrangements made by4 any one entity( If the %oc ment already incl des a co)er te.t for the same co)er, pre)io sly added by yo or by arrangement made by the same entity yo are acting on behalf of, yo may not add another, b t yo may replace the old one, on e.plicit permission from the pre)io s p blisher that added the old one( The a thor3s4 and p blisher3s4 of the %oc ment do not by this &icense gi)e permission to se their names for p blicity for or to assert or imply endorsement of any >odified 'ersion( A( C6>/I#I#" %6CU>C#T* Uo may combine the %oc ment +ith other doc ments released nder this &icense, nder the terms defined in section < abo)e for modified )ersions, pro)ided that yo incl de in the combination all of the In)ariant *ections of all of the original doc ments, nmodified, and list them all as In)ariant *ections of yo r combined +or0 in its license notice, and that yo preser)e all their 9arranty %isclaimers( The combined +or0 need only contain one copy of this &icense, and m ltiple identical In)ariant *ections may be replaced +ith a single copy( If there are m ltiple In)ariant *ections +ith the same name b t different contents, ma0e the title of each s ch section niI e by adding at the end of it, in parentheses, the name of the original a thor or p blisher of that section if 0no+n, or else a niI e n mber( >a0e the same ad5 stment to the section titles in the list of In)ariant *ections in the license notice of the combined +or0( In the combination, yo m st combine any sections Cntitled 2=istory2 in the )ario s original doc ments, forming one section Cntitled 2=istory2, li0e+ise combine any sections Cntitled 21c0no+ledgements2, and any sections Cntitled 2%edications2( Uo m st delete all sections Cntitled 2Cndorsements(2 B( C6&&CCTI6#* 6$ %6CU>C#T* Uo may ma0e a collection consisting of the %oc ment and other doc ments released nder this &icense, and replace the indi)id al copies of this &icense in the )ario s doc ments +ith a single copy that is incl ded in the collection, pro)ided that yo follo+ the r les of this &icense for )erbatim copying of each of the doc ments in all other respects( Uo may e.tract a single doc ment from s ch a collection, and distrib te it indi)id ally nder this &icense, pro)ided yo insert a copy of this &icense into the e.tracted doc ment, and follo+ this &icense in all other respects regarding )erbatim copying of that doc ment( F( 1""EC"1TI6# 9IT= I#%CPC#%C#T 96E8* 1 compilation of the %oc ment or its deri)ati)es +ith other separate and independent doc ments or +or0s, in or on a )ol me of a storage or distrib tion medi m, is called an 2aggregate2 if the copyright res lting from the compilation is not sed to limit the legal rights of the compilationJs sers beyond +hat the indi)id al +or0s permit( 9hen the %oc ment is incl ded in an aggregate, this &icense does not apply to the other +or0s in the aggregate +hich are not themsel)es deri)ati)e +or0s of the %oc ment( If the Co)er Te.t reI irement of section 3 is applicable to these copies of the %oc ment, then if the %oc ment is less than one half of the entire aggregate, the %oc mentJs Co)er Te.t may be placed on co)ers that brac0et the %oc ment +ithin the aggregate, or the electronic eI i)alent of co)ers if the %oc ment is in electronic form( 6ther+ise they m st appear on printed co)ers that brac0et the +hole aggregate( G( TE1#*&1TI6# Translation is considered a 0ind of modification, so yo may distrib te translations of the %oc ment nder the terms of section <( Eeplacing In)ariant *ections +ith translations reI ires special permission from their copyright holders, b t yo may incl de translations of some or all In)ariant *ections in addition to the original )ersions of these In)ariant *ections( Uo may incl de a translation of this &icense, and all the license notices in the %oc ment, and any 9arranty %isclaimers, pro)ided that yo also incl de the original Cnglish )ersion of this &icense and the original )ersions of those notices and disclaimers( In case of a disagreement bet+een the translation and the original )ersion of this &icense or a notice or disclaimer, the original )ersion +ill pre)ail( If a section in the %oc ment is Cntitled 21c0no+ledgements2, 2%edications2, or 2=istory2, the reI irement 3section <4 to Preser)e its Title 3section 14 +ill typically reI ire changing the act al title( :( TCE>I#1TI6# Uo may not copy, modify, s blicense, or distrib te the %oc ment e.cept as e.pressly pro)ided for nder this &icense( 1ny other attempt to copy, modify, s blicense or distrib te the %oc ment is )oid, and +ill a tomatically terminate yo r rights nder this &icense( =o+e)er, parties +ho ha)e recei)ed copies, or rights, from yo nder this &icense +ill not ha)e their licenses terminated so long as s ch parties remain in f ll compliance( 10( $UTUEC EC'I*I6#* 6$ T=I* &ICC#*C The $ree *oft+are $o ndation may p blish ne+, re)ised )ersions of the "#U $ree %oc mentation &icense from time to time( * ch ne+ )ersions +ill be similar in spirit to the present )ersion, b t may differ in detail to address ne+ problems or concerns( *ee http@!!+++(gn (org!copyleft!( Cach )ersion of the &icense is gi)en a disting ishing )ersion n mber( If the %oc ment specifies that a partic lar n mbered )ersion of this &icense 2or any later )ersion2 applies to it, yo ha)e the option of follo+ing the terms and conditions either of that specified )ersion or of any later )ersion that has been p blished 3not as a draft4 by the $ree *oft+are $o ndation( If the %oc ment does not specify a )ersion n mber of this &icense, yo may choose any )ersion e)er p blished 3not as a draft4 by the $ree *oft+are $o ndation(

41&

411

*eferences
Q1*UGBR 1ho, 1lfred, *ethi, Ea)i, and Ullman, Jeffrey( Compilers, Principles TechniI es and Tools( 1ddison-9esley, 1:GB( Q$oleyG2R $oley, J(%, and 1('an %am( $ ndamentals of Interacti)e Comp ter "raphics( Eeading, >1@ 1ddison-9esley P blishing Company, 1:G2( Q"ris+old:BR "ris+old, Ealph C and "ris+old, >adge T( The Icon Programming &ang age, Third Cdition( *an Jose, C1@ Peer-To-Peer Comm nications, 1::B( Q"ris+old:GR "ris+old, Ealph C(, Jeffery, Clinton &(, and To+nsend, "regg >( "raphics Programming in Icon( *an Jose, C1@ Peer-To-Peer Comm nications, 1::G( Q"ris+oldF1R "ris+old, Poage, and Polons0y ( The *#6/6& < Programming &ang age, 2nd ed( Cngle+ood Cliffs, #(J( Prentice-=all, Inc( 1:F1( QJeffery::R Clinton &( Jeffery( Program >onitoring and 'is ali7ation@ 1n C.ploratory 1pproach( *pringer-'erlag, #e+ Uor0, #U( 1:::( QJeffery0<R Jeffery, Clinton, >ohamed, *hamim, Pereda, Eay, and Parlett, Eobert( Programming +ith Unicon( %raft man script from http@!! nicon(org Q&e=:1R 1rna d &e=ors( The D Pi.>ap $ormat( "ro pe / ll, 8oala Pro5ect, I#EI1, $rance, 1::1( Q#yeGGR 1drian #ye, editor( Dlib Eeference >an al( 6JEeilly L 1ssociates, Inc(, *ebastopol, California, 1:GG( Q6pen"&::R 6pen"& 1rchitect re Ee)ie+ /oard, 9oo, >ason, #eider, Jac0ie, %a)is, Tom, *hreiner, %a)e( 6pen"& Programming " ide@ the 6fficial " ide to &earning 6pen"&, Third Cdition( Eeading, >1@ 1ddison-9esley P blishing Company, 1:::( Q6pen"&00R 6pen"& 1rchitect re Ee)ie+ /oard, *hreiner, %a)e( 6pen"& Programming " ide@ the 6fficial Eeference %oc ment to 6pen"&, Third Cdition( Upper *addle Eeading, >1@ 1ddison-9esley P blishing Company, 2000( QT"J:BR "regg >( To+nsend, Ealph C( "ris+old, and Clinton &( Jeffery( Config ring the *o rce Code for 'ersion : of Icon, Technical Eeport IP%23Gc, %epartment of Comp ter *cience, Uni)ersity of 1ri7ona, 1pril 1::B( http@!!+++(cs(ari7ona(ed !icon!docs!ipd23G(htm( QT"J:GR "regg >( To+nsend, Ealph C( "ris+old, and Clinton &( Jeffery( Installing 'ersion : of Icon on U#ID Platforms, Technical Eeport IP%2<3e, %epartment of Comp ter *cience, Uni)ersity of 1ri7ona, $ebr ary 1::G( http@!!+++(cs(ari7ona(ed !icon!docs!ipd2<3(htm( QUhlGGR *tephen1( Uhler( >"E --- C &ang age 1pplication Interface( Technical report, /ell Comm nications Eesearch, J ly 1:GG( Q9al:<R 8enneth 9al0er( The E n-Time Implementation &ang age for Icon, http@!!+++(cs(ari7ona(ed !icon!ftp!doc!ipd2B1(pdf( Technical Eeport IP%2B1, %epartment of Comp ter *cience, Uni)ersity of 1ri7ona, J ne 1::<(

412

Q9al0er:<R 9al0er, 8enneth, The E n-Time Implementation &ang age for Icon( Technical Eeport from http@!!+++(cs(ari7ona(ed !icon! QEees GBR Jonathan Eees, 9illiam Clinger( et al( Ee)ised Eeport on the 1lgorithmic &ang age *cheme( *I"P&1# #otices, 21@12, %ecember 1:GB( Q/artlett G:R J( /artlett( *C=C>C-TC a Portable *cheme-to-C Compiler( Eesearch Eeport G:!1( %CC 9estern Eesearch &aboratory, Jan ary 1:G:( QU asaR T( U asa and >( =agiya( 8yoto Common &isp Eeport( Eesearch Instit te for >athematical *ciences, 8yoto Uni)ersity Q*ER "regory E( 1ndre+s, Eonald 1( 6lsson et al( 1n 6)er)ie+ of the *E &ang age and Implementation( T6P&1* 10@1, Jan ary 1:GG, pp A1-GB( Q9einerR J(&( 9einer and *( Eama0rishnan( 1 Piggy-bac0 Compiler for Prolog( Proceeding of the 1:GG Conference on Programming &ang age %esign and Implementation, *I"P&1# #otices 23@F, J ly 1:GG, pp( 2GG-2:A( Q*tro str p GBR /( *tro str p( The CNN Programming &ang age( 1ddison-9esley, 1:GB( QpeepholeR 1ndre+ *( Tanenba m, =ans )an *ta)eren, and Johan 9( *te)enson( Using Peephole 6ptimi7ation on Intermediate Code( T6P&1* <@1, Jan ary 1:G2( Q9 lfR 9illiam 1( 9 lf, Eichard( 8( Johnsson, Charles( /( 9einstoc0, *te)en( 6( =obbs, Charles( >( "esch0e( The %esign of an 6ptimi7ing Compiler( 1merican Clse)ier P b( Co(, #e+ Uor0, 1:FA( QdenoteR >( J( C( "ordon( The %enotational %escription of Programming &ang ages, 1n Introd ction( *pringer, 1:F:( Q*toyR J( C( *toy( %enotational *emantics@ The *cott-*trachey 1pproach to Programming &ang age Theory( >IT Press, Cambridge, 1:FF( Qansi-cR 1merican #ational *tandard for Information *ystems( Programming &ang age C, 1#*I D3(1A:-1:G:( 1merican #ational *tandards Instit te, #e+ Uor0, 1::0( QPrabhalaR /has0aram Prabhala and Ea)i *ethi( Cfficient Comp tation of C.pressions +ith Common * be.pressions( $ifth 1nn al 1C> *ymposi m on Principles of Programming &ang ages, pp( 222-230, Jan ary 1:FG( Q#ilssonR Jb3o!rgen $ischer #ilsson( 6n the Compilation of a %omain-/ased Prolog( Information Processing, Eichard Cd+ard 1llison >ason ed(, #orth-=olland, 1:G3, pp( 2:3-2::( Q>artine0R John >artine0 and 8el)in #ilsen( Code "eneration for the Temporary'ariable Icon 'irt al >achine( Technical Eeport G:-:, %epartment of Comp ter *cience, Io+a *tate Uni)ersity, %ecember 1:G:( QpntstrR %a)id E( Chase, >ar0 9egman, and $( 8enneth [adec0( 1nalysis of Pointers and *tr ct res( Proceeding of the 1::0 Conference on Programming &ang age %esign and Implementation, *I"P&1# #otices 2A@B, J ne 1::0, pp( 2:B-310( QdepptrR * san =or+it7, Phil Pfeiffer, and Thomas Eeps( %ependence 1nalysis for Pointer 'ariables( Proceeding of the 1:G: Conference on Programming &ang age %esign and Implementation, *I"P&1# #otices 2<@F, J ly 1:G:, pp( 2G-<0(

413

Qsmltl0 typeR #orihisa * 7 0i( Inferring Types in *malltal0( Cighth 1nn al 1C> *ymposi m on Principles of Programming &ang ages, pp( 1GF-1::, Jan ary 1:G1( Q>ilnerR Eobin >ilner( 1 Theory of Type Polymorphism in Programming( Jo rnal of Comp ter and *ystem *ciences( 1F@3, %ecember 1:FG, pp( 3<G-3FA( Q nifyR J( 1( Eobinson, 1 >achine-6riented &ogic /ased on the Eesol tion Principle( J1C>, 12@1, Jan ary 1:BA, pp( 23-<1( Qianl1R Ealph C( "ris+old and >adge T( "ris+old( The Icon 1nalyst e1, 1 g st 1::0( Q5ohn0R John 8ececiogl ( Pri)ate Comm nication( #o)ember 1::0( Qdebray apr:1R *a mya 8( %ebray( Pri)ate Comm nication( 1pril 1::1( Q+amR %( =( %( 9arren( 1n 1bstract Prolog Instr ction *et( Technical #ote 30:, *EI International, >enlo Par0, C1, 6ctober 1:G3(

414

415

Index

You might also like