////////////////////////////////////////////////////////////////////////////// // // The following defines cover the header buffer offsets // for a packet that is received on a port with a STUFF bit value of 1. // ////////////////////////////////////////////////////////////////////////////// // include Facet coprocessor register/bit-fields definitions #include "cop_defs.asm" // include IQ2000 module definitions and packet I/O descriptor defines #include "iq2000.h" #define LOGENABLE 0 //* disable(0) or enable(1) logging here. #define LOGENABLE2 0 // Program origin #define PGM_ORG 0x100 // Context PC registers #define r6CTXPC0 r6 #define r9CTXPC1 r9 #define r10CTXPC2 r10 #define r11CTXPC3 r11 // status register #define r12STATUS r12 // Offset from the Header Buffer Base #define ICM 0 // PIM Input Completion Message word // Offset from the Header Address #define PIDU 0 // PIM Input Descriptor upper word #define PIDL 4 // PIM Input Descriptor lower word #define PODU (-8) // POM Output Descriptor upper word #define PODM0 (-4) // POM Output Descriptor middle word 0 #define PODM1 0 // POM Output Descriptor middle word 1 #define PODL 4 // POM Output Descriptor lower word #define RNGHDR 8 //* Ring Header offset #define IPDA 40 //* offset to the ip destination address for IP #define ARPDA 48 //* offset to the ip destination address for ARP #define ETYPE 22 //* offset to the Ethernet Type field #define CNTRL 12 //* offset to the control message information //* shouldn't need the definitions below #define DMACU 8 // Dest MAC addr upper word starting point #define DMACL 12 // Dest MAC addr lower word starting point #define SMACU 16 // Src MAC addr upper word starting point #define SMACL 20 // Src MAC addr lower word starting point ////////////////////////////////////////////////////////////////////////////// // // The following defines cover bit offsets into the PIM Input // descriptor and the POM output descriptor // ////////////////////////////////////////////////////////////////////////////// #define LHBIT 31 // Long-packet bit of the PIM Input descriptor #define PHBIT 30 // Payload-packet bit of the PIM Input descriptor #define CSBIT 29 // Checksum bit of the PIM Input descriptor #define XHBIT 28 // Extended header bit in the PIM input descriptor #define MBIT 27 // User bit in the PIM Input descriptor // Also the DA MAC match bit in the octal mac #define ERBIT 07 // Error bit in the PIM Input descriptor ////////////////////////////////////////////////////////////////////////////// // // defines for Coprocessor 3 Registers // ////////////////////////////////////////////////////////////////////////////// #define ASNHDR R0 // CP3 GR0 #define CMPHDR R1 // CP3 GR1 #define ORDHDR R2 // CP3 GR2 #define CTUHDR R3 // CP3 GR3 #define CTSHDR R4 // CP3 GR4 #define ASNHBAD R8 // CP3 GR8 #define CMPHBAD R9 // CP3 GR9 #define ORDHBAD R10 // CP3 GR10 #define CTUHBAD R11 // CP3 GR11 #define CTSHBAD R12 // CP3 GR12 /////////////////////////////////////// // user/kernel context register assignments //////////////////////////////////////////////// #define r1ZERO2 r1 // Use r0/r1 for dword 0 #define r3ICM r3 // ICM word #define r4HD1 r4 // Header descriptor bytes 4-7 #define r5HD0 r5 // Header descriptor bytes 0-3 #define r6SBBASE r6 // Smart buffer base address #define r7QADDR r7 // SBUF Destination Queue Address #define r8AUXQ r8 // SBUF Aux Queue Address #define r9MASK0 r9 // PIM->POM Descriptor mask bytes 0-3 #define r10BUFOUT r10 // BUFOUT address #define r11HOSTQ r11 // SBUF host Queue Address #define r12SEMA0 r12 // MIPS semaphore 0 base addr #define r13SEMA1 r13 // MIPS semaphore 1 base addr #define r14MODE r14 // echo/fwd mode (0=echo, 1=fwd, etc) #define r15TLEN r15 // Type/Len field #define r16TBLADR r16 // Address of forwarding table #define r17PIDSRCP r17 // PIMID:SRCPORT #define r18TMP r18 // Scratch storage #define r19TMP r19 // Scratch storage #define r20DROPCNT r20 // Count of packets dropped in user ctx // Count of packets dropped in kernel ctx #define r30HDAD r30 // Header address // kernel-only context register assignments // #define r21EPC r21 // Exception address #define r22CAUSE r22 // Exception cause #define r23DCS r23 // DMA Control/Status #define r24PHYSTAT r24 // BIU PHY Status #define r25DMA_SYS0 r25 // DMA queue entry 0 system address #define r26DMA_LOC0 r26 // DMA queue entry 0 local addr+flags #define r27MID1 r27 // MID deallocate word bytes 4-7 #define r28MID0 r28 // MID deallocate word bytes 0-3 #define r29HD2 r29 // Header descriptor bytes 8-11 #define r31RETURN r31 // return address/link register ////////////////////////////////////////////////////////////////////////////// // // Register offset definitions // ////////////////////////////////////////////////////////////////////////////// #define BUFOUT 24 // Offset from the order manager base address // for the BUF_OUT reg used to release header // buffers back to the order manager ////////////////////////////////////////////////////////////////////////////// // // Predefined Memory Locations // ////////////////////////////////////////////////////////////////////////////// #define SBBASE 0x1000 // Base address of the smart buffer // loaded by the MIPS - used at init time #define SBWEST 0x3000 //* base for pom west-- 0x1000 for B, 0x3000 for D #define OMBASE 0x1004 // OMRG_BASE = Base addr of OM registers // loaded by the MIPS - used at init time #define PKTRCVD 0x1080 // a count of packets received #define DISCARD 0x0200 // Offset to SB discard queue (queue # 65) #define SMBUFB 0x1000 // offset from sb a base to get to sb b base. #define POMBASE 0x1400 // upper part of the base address to the poms #define DSNADDR 0x4010 // offset from zero to the OM destination port #define POMAOFF 0x8000 // offset from zero to pom a base. #define POMBOFF 0xa000 // offset from zero to pom b base. #define POMWEST 0xe000 // offset from zero to pom west (d) #define CFG_BASE 0x1100 #define CNTRS_LOC 0x1180 //* when we load counters for local use #define ZERO_CNTRS 0x11c0 //* zero counters in memory #define CNTR_TBL_BASE 0x1500 #define FWD_TBL_BASE 0x0400 //* use counter table base as upper address //* fwd_tbl_base = Ox15000400 #define CTRL_CNT_BASE 0x0500 //* control counter base = 0x15000500 #define CNTRS_SIZE 0x0040 //* number of bytes in dma for counter //* 4*n transit counters + 4*n transmit counters. //currently capacity is only 16 bits bc we use an 8 bit rate. //upper capacity will be important when we switch to 10 bits. //#define CAPACITYL 0x3fc0 //* max = 16320 (rtt = 1.2 ms) //#define CAPACITYDIV3L 0x1540 //* max = 16320 (rtt = 1.2 ms) //#define LINKDELAY 0x20 //* link delay for realtime counter for max = 16320 //#define CAPACITYL 0x7f80 //* max = 32640 //#define CAPACITYDIV3L 0x2a80 //* max = 32640 #define CAPACITYU 0x0007 //Capacity upper and lower half-words #define CAPACITYL 0xf800 //* max = 522,240 (rtt = 9.6 ms) #define CAPACITYDIV3U 0x0002 #define CAPACITYDIV3L 0xa800 //* max = 522,240 (rtt = 9.6 ms) #define LINKDELAY 0x20a3 #define MAXRATE 0xFF //* 255 * 2^8 is the max rate // unused //#define CAPACITYDIV5 0x14 //* FIX ME testing with capacity/5 = "20" #define SMPRTDIS 0x1084 // A count of same port discards #define ERRPKT 0x1088 // A count of error packet discards #define HOSTRING 0x108C // A count of packets host to ring #define RINGTRANS 0x1090 // A count of packets ring to ring #define RINGHOST 0x1094 // A count of packets ring to host #define CTRLPKTS 0x1098 // A count of control packets #define REALTIME 0x10A0 // Real time counter locations (3 words through 0x10AB) #define HBADDR 0x10AC // HB Log pointer #define GENLOCAL 0x10B0 // local address for control pkt generation #define STTIMELOC 0x10B4 // local track of when to start time #define RTCNT 0x10B8 // count of the number of times // the control packet goes around the ring. #define RINGCNT 0x10BC // number of nodes left before rtt complete #define COUNTERSUM 0x1300 ////////////////////////////////////////////////////////////////////////////// // // Preassigned registers // ////////////////////////////////////////////////////////////////////////////// #define r27SBB R27 // Smart Buffer Base Address #define r26OMB R26 // Order Manager Base Address ////////////////////////////////////////////////////////////////////////////// // // Register Usage // ////////////////////////////////////////////////////////////////////////////// #define r1HAD R1 // Header Address #define r2HBB R2 // Header Buffer Base Address // This is used to find the ICM #define r4LID R4 // Lower Input Descriptor #define r5UID R5 // Upper Input Descriptor #define r6LOD R6 // Lower Output Descriptor #define r7UOD R7 // Upper Output Descriptor #define r8DML R8 // Hi order 2 bytes of the DA MAC with 0's fill #define r9DMH R9 // Low order word (4 bytes) of the DA MAC #define r11_CONFIGTABLE R11 //* config table entry for current pkt #define r12_RINGHEADER R12 //* ring header for current pkt #define r15_MYRINGADDR R15 //* ring address for current node #define r16_DESTRINGADDR R16 //* ring destination address for current pkt #define r18_ICML R18 //* input completion message #define r19_ICMU R19 //* input completion message #define r20DNPRT R20 //destination port value for order managing #define r16TIMER r16 // Real time counter for this file #define r17REQ r17 // Request time for this file #define r17_K R17 //* k in max/min algorithm #define r18_FAIRSHARE R18 //* fair share in max/min algorithm #define r19_RUNNINGCAP R19 //* running capacity in max/min alg #define SRRESY R17 //* define SRRESY register #define LURESY R1 //* define LURESY register //---------------------------------------------------------------------------- // Two styles of macro. The first uses the "C-preprocessor" for textual // substitution, the second uses the actual macro facility in the assembler. // Note the differences in the syntax. //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- // GetPkt // // A more meaningful wrapper for MFC3, into register 'r' of type 't' // (that is, using coprocessor register 't'). // //----------------------------------------------------------------------------- #define GetPkt(r,t) \ MFC3 r,t ;\ NOP //******************************************************* //* //* a suite of functions called shift //* to be used in the sort to shift registers //* //******************************************************* #define Shift1(y,z) \ or r13, r0, z ; \ or z, r0, y ; \ or y, r0, r13 #define Shift2(x,y,z) \ or r13, r0, z ; \ or z, r0, x ; \ or x, r0, y ; \ or y, r0, r13 #define Shift3(w,x,y,z) \ or r13, r0, z ; \ or z, r0, w ; \ or w, r0, x ; \ or x, r0, y ; \ or y, r0, r13 #define Shift4(v,w,x,y,z) \ or r13, r0, z ; \ or z, r0, v ; \ or v, r0, w ; \ or w, r0, x ; \ or x, r0, y ; \ or y, r0, r13 #define Shift5(u,v,w,x,y,z) \ or r13, r0, z ; \ or z, r0, u ; \ or u, r0, v ; \ or v, r0, w ; \ or w, r0, x ; \ or x, r0, y ; \ or y, r0, r13 #define Shift6(t,u,v,w,x,y,z) \ or r13, r0, z ; \ or z, r0, t ; \ or t, r0, u ; \ or u, r0, v ; \ or v, r0, w ; \ or w, r0, x ; \ or x, r0, y ; \ or y, r0, r13 #define Shift7(s,t,u,v,w,x,y,z) \ or r13, r0, z ; \ or z, r0, s ; \ or s, r0, t ; \ or t, r0, u ; \ or u, r0, v ; \ or v, r0, w ; \ or w, r0, x ; \ or x, r0, y ; \ or y, r0, r13 #define Shift8(r,s,t,u,v,w,x,y,z) \ or r13, r0, z ; \ or z, r0, r ; \ or r, r0, s ; \ or s, r0, t ; \ or t, r0, u ; \ or u, r0, v ; \ or v, r0, w ; \ or w, r0, x ; \ or x, r0, y ; \ or y, r0, r13 //---------------------------------------------------------------------------- // IncrementCounter // // Increments a counter in the DataStore at address 'ctr'. // Steps on r19 // //----------------------------------------------------------------------------- .macro IncrementCounter ctr LW r19, \ctr(r0) # Get the current counter value NOP # Cannot reference it yet ADDI r19, r19, 1 # Increment it SW r19, \ctr(r0) # Write it back .endm // Macros #define LoadW(r,w) \ lui r,w>>16 ;\ ori r,r,w&0xFFFF ////////////////////////////////////////////////////////////////////////////// // // Layer 2 switch code // ////////////////////////////////////////////////////////////////////////////// // // Setup cold start vector, re-start vector and exception vector // .org 0x0000 .global _start _start: K_Boot: j K_Start nop .org 0x0040 K_BEV: j K_Boot mtc0 r0, STATUS .org 0x0080 K_Exception: // include exception handler for Timer interrupts #include "timerint.asm" // // Kernel startup code // // Inputs: r14MODE register contains forwarding mode // K_Start: // enable mailbox interrupt for re-starting code ori r1, r0, (MBX_IS | MBX_IE) ctc0 r1, BIUINT // initialize processor status (COP-use, interrupt enables, KUp/IEp) LoadW(r18TMP, STATUS_INIT) // Enable all cops, Set KUp,IEp,IEc mtc0 r18TMP, STATUS nop nop nop // Enable DMA interrupt and halt-on-error, clear pending status bits ori r18TMP, r0, DCS_INIT mtc3 r18TMP, DCS // enable all contexts & initialize AGE register ori r18TMP, r0, 0x1B // Set AGEREG to 0=highest, 3=lowest ctc0 r18TMP, AGEREG ctc0 r0, CTX0SB // Enable context 0 ctc0 r0, CTX1SB // Enable context 1 ctc0 r0, CTX2SB // Enable context 2 ctc0 r0, CTX3SB // Enable context 3 ori r18TMP, r0, CTX0 // Load start addr for ctx 0-3 mtc0 r18TMP, CTXPC0 mtc0 r18TMP, CTXPC1 mtc0 r18TMP, CTXPC2 mtc0 r18TMP, CTXPC3 // Init kernel registers or r20DROPCNT, r0, r0 // Clear pkt drop/abort count ori r1ZERO2, r0, 0 // Form dword pair with zero value (R0:R1) LoadW(r6SBBASE, SBA_BASE) // Load up SB base for DMA abort checking cfc0 r19, BIUINT // Get BIUINT Register nop ori r19, r19, 0x8 // Enable Timer3 interrupt ctc0 r19, BIUINT // Write it back // Go startup the contexts jcr r0 // Start context 0 rfe // // User Context Entry Point // // - initialize fixed register values for each context // CTX0: CTX1: CTX2: CTX3: LoadW(r11HOSTQ, (SBA_BASE+(HOST_FWD_QUEUE*8))) LoadW(r8AUXQ, (SBA_BASE+(HOST_AUX_QUEUE*8))) LoadW(r10BUFOUT, BUF_OUT) LoadW(r12SEMA0, (SEMA_BASE+(0*8))) LoadW(r13SEMA1, (SEMA_BASE+(1*8))) LoadW(r16TBLADR, PKT_FWD_TBL) // clear number of aborted packets dropped or r20DROPCNT, r0, r0 // define mask for converting PID(word0) to POD(word0) LoadW(r9MASK0, 0xC7FFFF7F) ori r1ZERO2, r0, 0 // Form dword pair with zero value (R0:R1) // Setup and initialization LW r27SBB, SBBASE(R0) // Initalize the base address // register for the smart buffer LW r26OMB, OMBASE(R0) // Initalize the base address // register for the order manager ORI R25, R0, GETHDR // Start-of-loop in R25 GETHDR: // Get header buffer pointer GetPkt(r1HAD,CMPHBAD) // Get UNASSIGNED CTS Hdr Buf Pointer LDW r4LID, PIDU(r1HAD) // Get the Input Descriptor into r5UID/r4LID nop // Check first for aux channel pkt (from port # 15) bbi r4LID(B_SRCPORT_AUX),AuxPkt // Branch if port # > 7 or r7QADDR, r0, r8AUXQ // default to AUX queue //********************** //* CONTROL PACKET GENERATION BEGIN //* //********************** lw r13, GENLOCAL(r0) //* local address for generation nop beq r13, r0, PKTGENERATEDONE lui r30, CNTR_TBL_BASE //* base of the sram srrdl r30 cfc2 r14, SRRESY nop sw r0, GENLOCAL(r0) //* clear local bit. bne r14, r0, GENPKT nop luulck r30 //* unlock j PKTGENERATEDONE GENPKT: nop srwru r30, r0 //* write it back to zero and unlock so no one else generates ori r30, r0, CNTRS_LOC //* landing for control packet generation ori r14, r0, 36 //* header length sw r14, 0(r30) //* store POD0 sw r0, 4(r30) //* POD1 sw r0, 8(r30) //* POD2 sw r0, 12(r30) //* POD3 LoadW(r14, 0x08082000) sw r14, 16(r30) //* ring header ori r14, r0, MAXRATE sw r14, 20(r30) sw r14, 24(r30) //* initialize control message v(t) to MAXRATE. sw r14, 28(r30) sw r14, 32(r30) sw r14, 36(r30) sw r14, 40(r30) sw r14, 44(r30) sw r14, 48(r30) ori r14, r27SBB, 0 //* put the packet on queue A0 wxr30 r14, 0, 7 //* send 5 word 64s // ori r14, r27SBB, SBWEST //* put the packet on queue D0 //FIXME (?) commented out east ring control pkt // wxr30 r14, 0, 7 trapqne //********************** //* CONTROL PACKET GENERATION END //* //********************** PKTGENERATEDONE: ram r11, r1HAD, 7,28,0 //get buffer number for dstn_port value mfc0 r12, REVID nop //get ppe id for dstn_port value ram r12, r12, (REVID_PPENUM_POS-4),26,4 or r20DNPRT, r12, r11 //put them together // Increment packet count jal SaveHdrBfr // Save the packet in memory nop IncrementCounter PKTRCVD //* global packet count. this isn't ours, but it may be useful. // Parse the PIM Input descriptor //* getting source port and pim id to find index into configuration table ram r11, r4LID, 30,26,2 //* Get the src port info ram r12, r4LID, 0,25,5 //* Get PIMid or r11, r11, r12 //* now we have pimid in bits 5,6 and src prt in bits 2,3,4 in r11 lw r11_CONFIGTABLE, CFG_BASE(r11) //* add configuration table base to value in r11 nop //* now we have the entry from the config table in r11 bbin r11_CONFIGTABLE(31), HOST_PKT //* if packet from host, branch to host packet processing nop //********************** //* RING_PKT -- falling through to ring packet processing //* //********************** lw r12_RINGHEADER, RNGHDR(r1HAD) //* load the ring header ram r15_MYRINGADDR, r11_CONFIGTABLE, 0,24,0 //* get my own ring address //* could also be ram r15,r4LID,0,28,0 ram r13, r12_RINGHEADER, 24,24,0 //* get ring source address for this packet beq r13, r15_MYRINGADDR, STRIP_PKT //* if the source is my own, strip the packet //* fall through to test for control packet ram r13, r12_RINGHEADER, 12,28,0 //* get packet type xori r14, r13, 2 //* see if it's control- type 2 beq r14, r0, CNTRL_PKT //* if it's control, branch to control packet processing xori r14, r13, 1 //* see if it's normal- type 1 bne r14, r0, ERROR_ILLEGAL_TYPE //* CRASH! //* not a control packet, therefore write destination_port with 0 value ori r30, r0, CNTRS_LOC //* landing for writing dest-port sw r20DNPRT, 0(r30) //* bc it's not control, //* we already have the complete value for storage. orui r13, r0, POMBASE //* get base address upper ori r13, r13, DSNADDR //* get destport address lower wbr30 r13, 0, 4 trapqne //* fall through if it's a regular ol' packet ram r13, r12_RINGHEADER, 16,24,0 //* get this packet's ring destination address bne r13, r15_MYRINGADDR, NOT_MY_DEST //* not for us-- we will forward this packet on. nop //*********************** //* MY_DEST //* fall through to MY_DEST when there is a ring packet //* destined for the current node. //* //*********************** // getting here means that the packet checks out OK and // that we should now build the POM output descriptor bbin r5UID(PHBIT), EMITPKT0 // does pkt have payload? ori r6LOD, r0, 0 // no -- r6LOD for HEADER packet bbi r5UID(LHBIT), LONGPKT0 // is pkt LONG? andoi r2HBB, r1HAD, 0xFF80 // Compute the Header Buffer Base if needed // To find the ICM j EMITPKT0 // no -- it's COMPLETE ram r6LOD, r4LID, 0, 9, 16 // r6LOD for COMPLETE packet LONGPKT0: lw r18, ICM(r2HBB) // load ICM nop // wait for ICM load to complete ram r6LOD, r18, 0, 0, 16 // r6LOD for LONG packet EMITPKT0: ori r6LOD, r6LOD, 0x3000 //* set 6 byte offset in order to skip over the ring header //* and the stuff bytes in the lower output descriptor addi r7UOD, r5UID, -6 // Copy r5UID into r7UOD //* also change the length because we removed the ring header sdw r6LOD, PODU(r1HAD) // Save the first word64 into // the output descriptor add r7UOD, r0, r0 // Clear the first word32 add r6LOD, r0, r0 // Clear the second word32 sdw r6LOD, PODM1(r1HAD) // Save the second word64 into // the output descriptor BBI r5UID(ERBIT), ERROR // Branch if any packet error // Now the output descriptor is complete in the header buffer // and it is time to send the packet to the smart buffer. // This is accomplished by performing a DMA to the smart // buffer base address plus the port number, left shifted 6 ram r13, r11_CONFIGTABLE, 13,23,3 //* get host output queue from config table times eight addi r13, r13, 0x2000 //* add this offset to reach POMC queues //* if the output POM was unknown, we'd look in //* bits 23-22 of the config table entry. //* (example of this in not-my-packet) IncrementCounter RINGHOST jal SaveHdrBfr // Save the packet in memory nop add r13, r13, r27SBB // add the smart buffer base // address. R13 now contains // the address of the queue // this packet should be written // into. addi r1HAD, r1HAD, -8 // Adjust the Header Address to point to the Output // Descriptor instead of the Input Descriptor pkrlr1 r13, 0, 15 // Send packet nop jr r25 // Return to get next packet nop //******************* //* end MY_DEST code //* //******************* //***************************** NOT_MY_DEST: //* calculating the counter table offset //* if N=current node addr //* C=direction //* S=source ring addr //* counter index is CNNN0SSS00 ram r13, r15_MYRINGADDR, 26,23,6 //* my ring address times 128 ram r14, r11_CONFIGTABLE, 21,22,9 //* east/west direction times 512 or r13, r13, r14 //* add them together ram r14, r12_RINGHEADER, 22,26,2 //* get ring source address times 4 add r13, r13, r14 //* add them together orui r13, r13, CNTR_TBL_BASE //* now we have the address of the counter for this exact packet ram r17, r13, 0,0,6 //* find 64 byte boundary lulck r17 //* lock 64 byte boundary cfc2 r20, LURESY srrd r13 //* read counter and lock. //* get header length and payload length and add them together. put final result in r16 //* this is the total size of the packet, which we will add to the counter. bbin r5UID(PHBIT), COUNT1 //* does pkt have a payload? if no, count header only. ram r16, r5UID, 0,25,0 //* get header length from upper word of the input descriptor //* this is the length of the packet. //* fall through only if the pkt has a payload. bbi r5UID(LHBIT), COUNT_LONGPKT1 //* is pkt LONG? andoi r2HBB, r1HAD, 0xFF80 //* Compute the Header Buffer Base if needed //* To find the ICM //* fall through only if the pkt is COMPLETE. ram r21, r4LID, 16,24,0 //* get payload length from lower word of the input descriptor j COUNT1 addu r16, r16, r21 //* add payload length to header length for total length COUNT_LONGPKT1: ldw r18_ICML, ICM(r2HBB) //* load ICM nop //* wait for ICM load to complete //* ICM lower word is in r18 //* ICM upper word is in r19 ram r21, r19_ICMU, 16,16,0 //* get payload length from the input completion message add r16, r16, r21 //* add payload length to header length for total length COUNT1: cfc2 r14, SRRESY //* return from counter read with counter value nop add r14, r14, r16 //* update counter srwr r13, r14 //* write the counter back and unlock. luulck r17 //* unlock 64 byte boundary //* a copy of above code to release pkt bbin r5UID(PHBIT), EMITPKT1 // does pkt have payload? ori r6LOD, r0, 0 // no -- r6LOD for HEADER packet bbi r5UID(LHBIT), LONGPKT1 // is pkt LONG? andoi r2HBB, r1HAD, 0xFF80 // Compute the Header Buffer Base if needed // To find the ICM j EMITPKT1 // no -- it's COMPLETE ram r6LOD, r4LID, 0, 9, 16 // r6LOD for COMPLETE packet LONGPKT1: //* ******** //* old code follows, kept in the case that the new code is incorrect. //* //* lw r18, ICM(r2HBB) // load ICM //* nop // wait for ICM load to complete //* ram r6LOD, r18, 0, 0, 16 // r6LOD for LONG packet //* //* ******** ram r6LOD, r19_ICMU, 0, 0, 16 //* r6LOD for LONG packet EMITPKT1: ori r6LOD, r6LOD, 0x000 //* set 0 byte offset in order to keep the ring header //* and the stuff bytes in the // lower output descriptor addi r7UOD, r5UID, 0 // Copy r5UID into r7UOD //* don't change the length because we need the ring header sdw r6LOD, PODU(r1HAD) // Save the first word64 into // the output descriptor add r7UOD, r0, r0 // Clear the first word32 add r6LOD, r0, r0 // Clear the second word32 sdw r6LOD, PODM1(r1HAD) // Save the second word64 into // the output descriptor BBI r5UID(ERBIT), ERROR // Branch if any packet error // Now the output descriptor is complete in the header buffer // and it is time to send the packet to the smart buffer. // This is accomplished by performing a DMA to the smart // buffer ram r14, r11_CONFIGTABLE, 14,30,0 //* get output POM from config table beq r14, r0, SENDRINGPKT //* if the POM bits are zero, put the pkt on poma, whose //* base address is already stored ram r13, r11_CONFIGTABLE, 5,23,3 //* get ring output queue from config table times 8 //* fall through to POMB Packet processing addi r13, r13, SBWEST //* add 0x1000 to the poma base address in order to reach the pomb base SENDRINGPKT: IncrementCounter RINGTRANS jal SaveHdrBfr // Save the packet in memory nop add r13, r13, r27SBB // add the smart buffer base // address. R13 now contains // the address of the queue // this packet should be written // into. addi r1HAD, r1HAD, -8 // Adjust the Header Address to point to the Output // Descriptor instead of the Input Descriptor pkrlr1 r13, 0, 15 // Send packet //* 15 word 64s is the size of the DMA transfer, //* probably always large enough nop jr r25 // Return to get next packet nop //***************************** //* END TRANSIT PACKET PROCESSING //* //***************************** //***************************** //* END RING PACKET PROCESSING //* //***************************** //************************* //* BEGIN HOST/TRANSMIT PACKET PROCESSING //* //************************* HOST_PKT: //* not a control packet, therefore write destination_port with 0 value ori r30, r0, CNTRS_LOC //* landing for writing dest-port sw r20DNPRT, 0(r30) //* bc it's not control, //* we already have the complete value for storage. orui r13, r0, POMBASE //* get base address upper ori r13, r13, DSNADDR //* get destport address lower wbr30 r13, 0, 4 trapqne lh r13, ETYPE(r1HAD) //* Get the Ethernet Type ori r14, r0, 0x806 //* ARP Ethernet Type bne r13, r14, NormPkt //* Check for an ARP nop j GotNormal lw r13, ARPDA(r1HAD) //* get ip dest address for ARP Packets nop //* eliminate Assembler error message NormPkt: lw r13, IPDA(r1HAD) //* get ip dest address for IP Packets GotNormal: nop ram r13, r13, 30,26,2 //* put lower four bits such that we can index the fwd table orui r13, r13, CNTR_TBL_BASE //* upper half of forward table base addiu r13, r13, FWD_TBL_BASE //* lower half srrd r13 //* no need to lock bc we are only reading forwarding table. cfc2 r14, SRRESY nop //* (12/31) for now assume the destination address is in the lower //* eight bits of this forwarding table entry. ram r16_DESTRINGADDR, r14, 0,24,0 //* get the ring dest address for this packet ram r15_MYRINGADDR, r4LID, 0,28,0 //* get my own ring address. //* remember: current node address is not stored //* in configuration table for host packets. //* it is only stored as the port //* on which the packet arrived. beq r15_MYRINGADDR, r16_DESTRINGADDR, ERROR_HOST_PKT //* cannot have a new packet destined //* for the current node. //* add header //* 1. source is current node address //* 2. destination has been determined from forwarding table //* 3. type = 1 (normal packet) ram r12_RINGHEADER, r15_MYRINGADDR, 8,0,24 //* put the pkt's ring address into source position, //* bits 31-24 of the header. ram r13, r16_DESTRINGADDR, 16,8,16 //* put the pkt's destination ring address into //* destination position, bits 23-16 of the header. add r12_RINGHEADER, r12_RINGHEADER, r13 //* add them togetha ori r12_RINGHEADER, r12_RINGHEADER, 0X1000 //* put a "1" in for packet type, bits 12-15 //* this is literally putting a 1 in bit 12 //* ring header is now complete. //* append the header sw r12_RINGHEADER, 4(r1HAD) //* put the ring header into the header at r1HAD+4 addi r1HAD, r1HAD, -8 //* subtract 8 to the header address pointer to make //* room for the output descriptor. addi r5UID, r5UID, 6 //* add 6 to the header length to ensure xmission of ring header //* and stuff bytes. //* we can do this just by adding 6 to the UID bc the header length //* is in the low seven bits. //* count packet //* if N= current node addr //* D= destination addr //* counter index is at CNNN1DDD00 ram r13, r15_MYRINGADDR, 26,17,6 //* my ring address times 64 ram r14, r16_DESTRINGADDR, 30,26,2 //* ring destination address times 4 bbvn r11_CONFIGTABLE, r16_DESTRINGADDR, CNT_WEST //* if the bit is set, the packet is eastbound. or r13, r13, r14 //* add them together //* fall through if the packet is eastbound and we need to set the direction bit. addi r13, r13, 0x200 //* set bit 9 CNT_WEST: addi r13, r13, 0x20 //* set bit 5 orui r13, r13, CNTR_TBL_BASE //* now we have the address of the counter for this exact packet ram r17, r13, 0,0,6 //* find 64 byte boundary lulck r17 //* lock 64 byte boundary cfc2 r16, LURESY nop srrd r13 //* read counter and lock. //* get header length and payload length and add them together. put final result in r21 //* this is the total size of the packet, which we will add to the counter. bbin r5UID(PHBIT), COUNT2 //* does pkt have a payload? if no, count header only. ram r21, r5UID, 0,25,0 //* get header length from upper word of the input descriptor //* this is the length of the packet. //* fall through only if the pkt has a payload. bbi r5UID(LHBIT), COUNT_LONGPKT2 //* is pkt LONG? andoi r2HBB, r1HAD, 0xFF80 //* Compute the Header Buffer Base if needed //* To find the ICM //* fall through only if the pkt is COMPLETE. ram r19, r4LID, 16,24,0 //* get payload length from lower word of the input descriptor j COUNT2 add r21, r21, r19 //* add payload length to header length for total length COUNT_LONGPKT2: ldw r18_ICML, ICM(r2HBB) //* load ICM nop //* wait for ICM load to complete //* ICM lower word is in r18 //* ICM upper word is in r19 ram r20, r19_ICMU, 16,16,0 //* get payload length from the ICM upper word add r21, r21, r20 //* add payload length to header length for total length COUNT2: cfc2 r14, SRRESY //* return from counter read with counter value nop add r14, r14, r21 //* update counter srwr r13, r14 //* write the counter back and unlock. luulck r17 //* unlock 64 byte boundary //* send packet bbin r5UID(PHBIT), EMITPKT2 // does pkt have payload? ori r6LOD, r0, 0 // no -- r6LOD for HEADER packet bbi r5UID(LHBIT), LONGPKT2 // is pkt LONG? andoi r2HBB, r1HAD, 0xFF80 // Compute the Header Buffer Base if needed // To find the ICM j EMITPKT2 // no -- it's COMPLETE ram r6LOD, r4LID, 0, 9, 16 // r6LOD for COMPLETE packet LONGPKT2: //* ******** //* old code follows, kept in the case that the new code is incorrect. //* //* lw r18, ICM(r2HBB) // load ICM //* nop // wait for ICM load to complete //* ram r6LOD, r18, 0, 0, 16 // r6LOD for LONG packet //* //* ******** //* FIX ME (1/10/02) check all ICM processing (r18/r19) //* to make sure that they're all correct. ram r6LOD, r19_ICMU, 0, 0, 16 //* r6LOD for LONG packet EMITPKT2: ori r6LOD, r6LOD, 0x2000 //* set offset to four bytes in order to skip to to //* the ring header addi r7UOD, r5UID, 0 // Copy r5UID into r7UOD //* also Don't change the length because we added the ring header sdw r6LOD, PODU(r1HAD) // Save the first word64 into // the output descriptor add r7UOD, r0, r0 // Clear the first word32 add r6LOD, r0, r0 // Clear the second word32 sdw r6LOD, PODM1(r1HAD) // Save the second word64 into // the output descriptor BBI r5UID(ERBIT), ERROR // Branch if any packet error // Now the output descriptor is complete in the header buffer // and it is time to send the packet to the smart buffer. // This is accomplished by performing a DMA to the smart // buffer base address plus the port number, left shifted 6 //* determine output port and queue: //* determine POM, port and queue from a table indexed by //* destination plus current node address bbv r11_CONFIGTABLE, r16_DESTRINGADDR, SEND_EAST //* if the bit is set, the packet is eastbound. nop //* if the bit is not set, the packet is westbound. //* fall through to SEND_WEST ram r13, r11_CONFIGTABLE, 13,23,3 //* west-destined packet, using B port info from //* bits 21-16 of the config table entry. ram r14, r16_DESTRINGADDR, 26,23,6 //* put this destination (queue #) times 64 into r14 add r13, r13, r14 //* and add to r13 to get destination queue. addi r13, r13, SBWEST //* Add the offset from the poma base to get to pomb. //* now r13 holds the destination address plus the base //* queue for its specific port, all of this times eight, and //* all of this is added to the pomb base. //* this gives us our specific output queue. j SEND_HOST_PKT nop SEND_EAST: ram r13, r11_CONFIGTABLE, 5,23,3 //* east-destined packet, using A port info from //* bits 13-8 of the config table entry ram r14, r16_DESTRINGADDR, 26,23,6 //* put this destination (queue #) times 64 into r14 add r13, r13, r14 //* add this to r13 to get destination queue. //* no need to add anything for base address-- we are already //* using the poma base. //* now r13 holds the destination address plus the base //* queue for its specific port, all of this times eight. //* this gives us our specific poma output queue. SEND_HOST_PKT: IncrementCounter HOSTRING jal SaveHdrBfr // Save the packet in memory nop add r13, r13, r27SBB // add the smart buffer base // address. R13 now contains // the address of the queue // this packet should be written // into. addi r1HAD, r1HAD, -8 // Adjust the Header Address to point to the Output // Descriptor instead of the Input Descriptor pkrlr1 r13, 0, 15 // Send packet //* 15 word 64s is the size of the DMA transfer, //* probably always large enough nop jr r25 // Return to get next packet nop //****************************** //* //* END HOST (INJECTED) PACKET PROCESSING //* //****************************** //****************************** //* //* BEGIN CONTROL PACKET PROCESSING //* //****************************** CNTRL_PKT: //* a control packet, therefore write destination_port with 1 value ori r30, r0, CNTRS_LOC //* landing for writing dest-port ori r20DNPRT, r20DNPRT, 0x100 //* bc it's control, we need to or in a different //* dest value-- 1 sw r20DNPRT, 0(r30) orui r13, r0, POMBASE //* get base address upper ori r13, r13, DSNADDR //* get destport address lower wbr30 r13, 0, 4 trapqne ldw r16TIMER, REALTIME(r0) // Get the real time counter and request nop or r17REQ, r0, r16TIMER // save current timer as base for request addi r17REQ, r17REQ, LINKDELAY // add T div #nodes sw r17REQ, REALTIME(r0) // Write request value back SLEEP0: mtc3 r0, INITAGE // make me the youngest context nop sleep nop lw r16TIMER, (REALTIME+4)(r0) nop sub r13, r16TIMER, r17REQ bltz r13, SLEEP0 nop //* we have the following information: //* r12_RINGHEADER, r15_MYRINGADDR, r11_CONFIGTABLE //ram r13, r12_RINGHEADER, 16,24,0 //* get this packet's ring destination address //bne r13, r15_MYRINGADDR, FWDCONTROL //* not for us-- we will do calculations and //* forward this packet on. //nop //* This code is currently unnecessary as we do our own control pkt generation. (2002/01/28) //* fall through if the packet has this node as destination: Discard it. //addi r13, r27SBB, DISCARD // Discard queue address //addi r13, r13, 0x2000 //* add this offset to reach POMC queues //addi r1HAD, r1HAD, -8 // Adjust the Header Address to point to the Output // Descriptor instead of the Input Descriptor //pkrlr1 r11, 0, 15 // Discard packet //nop //jr r25 // Go process the next packet //nop FWDCONTROL: //* **** CALCULATE CONTROL MESSAGE ENTRY **** //* retrieve this node's counters to CNTRS_LOC in local memory. bbi r11_CONFIGTABLE(30), GETCNTRS //* if the packet came *from* the west, //* ie, it was on the east ring, //* we need the west ring counters, //* and vice-versa. ori r13, r0, 0 //* WEST bit in the 9th position ori r13, r0, 0X200 //* EAST bit in the 9th position GETCNTRS: // 1NNN000000 ram r14, r15_MYRINGADDR, 26,23,6 //* my ring address in bits 8-6 add r13, r13, r14 //* add them together orui r13, r13, CNTR_TBL_BASE //* now we have the address of this set of counters // The following code is unnecessary because we are using the lurl // instruction which automatically does a 32 byte dma. orui r14, r0, CNTRS_SIZE //* size of counters for dma ori r14, r14, CNTRS_LOC //* local address to put counters lulck r13 cfc2 r16, LURESY nop rb r14, r13 orui r14, r0, CNTRS_SIZE // for write, need size of dma ori r14, r14, ZERO_CNTRS wb r14, r13 trapqne luulck r13 lw r16TIMER, (REALTIME+4)(r0) // Get the real time counter orui r13, r0, 0x002f // 0x204300 = ? ori r13, r13, 0x4300 // comparison val 0x62000 = 2 sec sub r13, r13, r16TIMER // 0xf4300 = 5 sec bgtz r13, SENDCONTROL // jump over control processing // if we haven't reached starttime nop // RING TIME count and sim crash time ldw r16, RTCNT(r0) // get the count of number of nodes left for one rtt nop addi r16, r16, -1 //decrement sw r16, RINGCNT(r0) // write it back to memory bne r16, r0, NOCRASH // if the number of nodes left is > 0, skip following code nop addi r17, r17, 1 // increment total number of ring times addi r16, r16, 8 // reset ring node count sdw r16, RTCNT(r0) // write them back to memory ori r16, r0, 0x1000 // wait for 7600=0x1db0 or 10800=0x2a30 ring times //orui r16, r16, 0xff sub r16, r16, r17 bgez r16, NOCRASH nop crash: j crash nop NOCRASH: //* add this node's transmit counters into one //* first offset is 32 because that's the offset to transmit counters //* then every two counters the offset increases by 8 ori r14, r0, CNTRS_LOC //* get local address again ldw r22, 32(r14) //* get counters for 0 & 1 nop ldw r20, 40(r14) //* get counters for 2 & 3 add r22, r22, r23 //* add counters 0 & 1 ldw r18, 48(r14) //* get counters for 4 & 5 add r20, r20, r21 //* add counters 2 & 3 ldw r16, 56(r14) //* get counters for 6 & 7 add r18, r18, r19 //* add counters 4 & 5 add r22, r22, r20 //* sum of { 0 1 2 3 } add r18, r18, r16 //* sum of { 4 5 6 7 } add r24, r18, r22 //* sum of all transmits //* load transit counters into registers. ldw r22, 24(r14) //* get counters for 6 & 7 ldw r20, 16(r14) //* get counters for 4 & 5 ldw r18, 8(r14) //* get counters for 2 & 3 ldw r16, 0(r14) //* get counters for 0 & 1 //* nop can be removed because next instruction does not depend on r16/r17. //DEBUG ori r13, r0, 2 bne r15_MYRINGADDR, r13, CSUM01 nop ori r13, r0, COUNTERSUM ldw r8, 0(r13) nop add r8, r8, r16 add r9, r9, r17 sdw r8, 0(r13) ldw r8, 8(r13) nop add r8, r8, r18 add r9, r9, r19 sdw r8, 8(r13) ldw r8, 16(r13) nop add r8, r8, r20 add r9, r9, r21 sdw r8, 16(r13) ldw r8, 24(r13) nop add r8, r8, r22 add r9, r9, r23 sdw r8, 24(r13) lw r8, 32(r13) nop add r8, r8, r24 sw r8, 32(r13) CSUM01: //*************************************** //**** BEGIN TOTAL USAGE CALCULATION **** //*************************************** //* before sorting, make sure that the total is greater than capacity. or r13, r0, r24 add r13, r13, r17 add r13, r13, r18 add r13, r13, r19 add r13, r13, r20 add r13, r13, r21 add r13, r13, r22 add r13, r13, r23 add r13, r13, r16 //* add up all the counters. orui r14, r0, CAPACITYU //* load upper capacity ori r14, r14, CAPACITYL //* load lower capacity sub r13, r13, r14 //* subtract the capacity //DEBUG ori r14, r0, 2 bne r15_MYRINGADDR, r14, CSUM1 nop ori r14, r0, COUNTERSUM lw r8, 36(r14) nop addu r8, r8, r13 sw r8, 36(r14) CSUM1: blez r13, NOSORT //* if total count is less than or equal to capacity, //* we don't have to sort. nop //*************************************** //**** END TOTAL USAGE CALCULATION **** //* if total usage <= capacity, using no-sort max/min //* if total usage > capacity, using sort max/min //*************************************** //*************************************** //**** BEGIN SORT **** //*************************************** //* sort the transit counters and the sum //* of this node's transmit counters. SORT: //* sorting registers r16 through r24 !!! //* r23 //* if r23 > r24 sub r13, r23, r24 blez r13, TESTR22 nop Shift1(r23,r24) TESTR22: //* r22 //* if r22 > r23 sub r13, r22, r23 blez r13, TESTR21 nop //* if r22 > r24 sub r13, r22, r24 blez r13, ELSE220 nop Shift2(r22,r23,r24) j TESTR21 nop //* else { r22 <= r24 } ELSE220: Shift1(r22,r23) TESTR21: //* r21 //* if r21 > r22 sub r13, r21, r22 blez r13, TESTR20 nop //* if r21 > r23 sub r13, r21, r23 blez r13, ELSE210 nop //* if r21 > r24 sub r13, r21, r24 blez r13, ELSE211 nop Shift3(r21,r22,r23,r24) j TESTR20 nop //* else { r21 <= r24 } ELSE211: Shift2(r21,r22,r23) j TESTR20 nop //* else { r21 <= r23 } ELSE210: Shift1(r21,r22) TESTR20: //* r20 //* if r20 > r21 sub r13, r20, r21 blez r13, TESTR19 nop //* if r20 > r22 sub r13, r20, r22 blez r13, ELSE200 nop //* if r20 > r23 sub r13, r20, r23 blez r13, ELSE201 nop //* if r20 > r24 sub r13, r20, r24 blez r13, ELSE202 nop Shift4(r20,r21,r22,r23,r24) j TESTR19 nop //* else { r20 <= r24 } ELSE202: Shift3(r20,r21,r22,r23) j TESTR19 nop //* else { if r20 <= r23 } ELSE201: Shift2(r20,r21,r22) j TESTR19 nop //* else { r20 <= r22 } ELSE200: Shift1(r20,r21) TESTR19: //* r19 //* if r19 > r20 sub r13, r19, r20 blez r13, TESTR18 nop //* if r19 > r21 sub r13, r19, r21 blez r13, ELSE190 nop //* if r19 > r22 sub r13, r19, r22 blez r13, ELSE191 nop //* if r19 > r23 sub r13, r19, r23 blez r13, ELSE192 nop //* if r19 > r24 sub r13, r19, r24 blez r13, ELSE193 nop Shift5(r19,r20,r21,r22,r23,r24) j TESTR18 nop //* else { r19 <= r24 } ELSE193: Shift4(r19,r20,r21,r22,r23) j TESTR18 nop //* else { r19 <= r23 } ELSE192: Shift3(r19,r20,r21,r22) j TESTR18 nop //* else { r19 <= r22 } ELSE191: Shift2(r19,r20,r21) j TESTR18 nop //* else { r19 <= r21 } ELSE190: Shift1(r19,r20) TESTR18: //* r18 //* if r18 > r19 sub r13, r18, r19 blez r13, TESTR17 nop //* if r18 > r20 sub r13, r18, r20 blez r13, ELSE180 nop //* if r18 > r21 sub r13, r18, r21 blez r13, ELSE181 nop //* if r18 > r22 sub r13, r18, r22 blez r13, ELSE182 nop //* if r18 > r23 sub r13, r18, r23 blez r13, ELSE183 nop //* if r18 > r24 sub r13, r18, r24 blez r13, ELSE184 nop Shift6(r18,r19,r20,r21,r22,r23,r24) j TESTR17 nop //* else { r18 <= r24 } ELSE184: Shift5(r18,r19,r20,r21,r22,r23) j TESTR17 nop //* else { r18 <= r23 } ELSE183: Shift4(r18,r19,r20,r21,r22) j TESTR17 nop //* else { r18 <= r22 } ELSE182: Shift3(r18,r19,r20,r21) j TESTR17 nop //* else { r18 <= r21 } ELSE181: Shift2(r18,r19,r20) j TESTR17 nop //* else { r18 <= r20 } ELSE180: Shift1(r18,r19) TESTR17: //* r17 //* if r17 > r18 sub r13, r17, r18 blez r13, TESTR16 nop //* if r17 > r19 sub r13, r17, r19 blez r13, ELSE170 nop //* if r17 > r20 sub r13, r17, r20 blez r13, ELSE171 nop //* if r17 > r21 sub r13, r17, r21 blez r13, ELSE172 nop //* if r17 > r22 sub r13, r17, r22 blez r13, ELSE173 nop //* if r17 > r23 sub r13, r17, r23 blez r13, ELSE174 nop //* if r17 > r24 sub r13, r17, r24 blez r13, ELSE175 nop Shift7(r17,r18,r19,r20,r21,r22,r23,r24) j TESTR16 nop //* else { r17 <= r24 } ELSE175: Shift6(r17,r18,r19,r20,r21,r22,r23) j TESTR16 nop //* else { r17 <= r23 } ELSE174: Shift5(r17,r18,r19,r20,r21,r22) j TESTR16 nop //* else { r17 <= r22 } ELSE173: Shift4(r17,r18,r19,r20,r21) j TESTR16 nop //* else { r17 <= r21 } ELSE172: Shift3(r17,r18,r19,r20) j TESTR16 nop //* else { r17 <= r20 } ELSE171: Shift2(r17,r18,r19) j TESTR16 nop //* else { r17 <= r19 } ELSE170: Shift1(r17,r18) TESTR16: //* r16 //* if r16 > r17 sub r13, r16, r17 blez r13, AFTERSORT nop //* if r16 > r18 sub r13, r16, r18 blez r13, ELSE160 nop //* if r16 > r19 sub r13, r16, r19 blez r13, ELSE161 nop //* if r16 > r20 sub r13, r16, r20 blez r13, ELSE162 nop //* if r16 > r21 sub r13, r16, r21 blez r13, ELSE163 nop //* if r16 > r22 sub r13, r16, r22 blez r13, ELSE164 nop //* if r16 > r23 sub r13, r16, r23 blez r13, ELSE165 nop //* if r16 > r24 sub r13, r16, r24 blez r13, ELSE166 nop Shift8(r16,r17,r18,r19,r20,r21,r22,r23,r24) j AFTERSORT nop //* else { r16 <= r24 } ELSE166: Shift7(r16,r17,r18,r19,r20,r21,r22,r23) j AFTERSORT nop //* else { r16 <= r23 } ELSE165: Shift6(r16,r17,r18,r19,r20,r21,r22) j AFTERSORT nop //* else { r16 <= r22 } ELSE164: Shift5(r16,r17,r18,r19,r20,r21) j AFTERSORT nop //* else { r16 <= r21 } ELSE163: Shift4(r16,r17,r18,r19,r20) j AFTERSORT nop //* else { r16 <= r20 } ELSE162: Shift3(r16,r17,r18,r19) j AFTERSORT nop //* else { r16 <= r19 } ELSE161: Shift2(r16,r17,r18) j AFTERSORT nop //* else { r16 <= r18 } ELSE160: Shift1(r16,r17) //*************************************** //**** END SORT **** //*************************************** AFTERSORT: //****************************************** //**** BEGIN SORTED MAX/MIN CALCULATION **** //****************************************** //* max/min given the usage is higher than capacity. //* find first non-zero entry so that we know how many flows we are dealing with. //* bgtz r16, MAXMIN16 //* nop //* bgtz r17, MAXMIN17 //* nop //* bgtz r18, MAXMIN18 //* nop //* bgtz r19, MAXMIN19 //* FIX ME check that these //* nop //* are correctly zero. orui r19_RUNNINGCAP, r0, CAPACITYU //* load upper capacity ori r19_RUNNINGCAP, r19_RUNNINGCAP, CAPACITYL //* load lower capacity orui r16, r0, CAPACITYDIV3U //load upper capacity div 3 ori r16, r16, CAPACITYDIV3L //load lower capacity div 3 // ram r18_FAIRSHARE, r19_RUNNINGCAP, 2,2,0 //* divide capacity by 4. fairshare = C/k j MAXMIN22 or r18_FAIRSHARE, r0, r16 //* divide capacity by 3. fairshare = C/k // bgtz r21, MAXMIN21 //* K = 4 // ram r18_FAIRSHARE, r19_RUNNINGCAP, 2,2,0 //* divide capacity by 4. fairshare = C/k // bgtz r22, MAXMIN22 //* K = 3 // or r18_FAIRSHARE, r0, r16 //* divide capacity by 3. fairshare = C/k // bgtz r23, MAXMIN23 //* K = 2 // ram r18_FAIRSHARE, r19_RUNNINGCAP, 1,1,0 //* divide capacity by 2. fairshare = C/k // bgtz r24, MAXMIN24 //* K = 1 // ram r18_FAIRSHARE, r19_RUNNINGCAP, 0,0,0 //* divide capacity by 1. fairshare = C/k MAXMIN21: sub r14, r18_FAIRSHARE, r21 bltz r14, REPLACE //* IF the current flow counter in R21 has a value nop //* GREATER THAN OR EQUAL TO that of the current fair share, //* we have the max/min fair share and we can jump to REPLACE. ori r13, r0, 2 bne r15_MYRINGADDR, r13, MAX0 nop lw r13, 0x1390(r0) nop addi r13, r13, 1 sw r13, 0x1390(r0) MAX0: //* OTHERWISE, we subtract the counter of this flow from the running capacity //* total that is available for share among the rest of the flows. Then we //* divide it between those flows. //* K = K-1 = 3 sub r19_RUNNINGCAP, r19_RUNNINGCAP, r21 //* running capacity total = C - r21 //* divide runningcap by K means divide r19 by 3. //* put it in r18_FAIRSHARE. DIV3: ORI r13, r0, 0x0 // Clear the result RAM r14, r19_RUNNINGCAP, 23, 15, 9 // Put multiplicand in bits [16:9] ADDU r13, r13, r14 // Add for the one in 1/3 SRL r13, r13, 2 // Shift result right by 2 ADDU r13, r13, r14 // Add for the one in 1/3 SRL r13, r13, 2 // Shift result right by 2 ADDU r13, r13, r14 // Add for the one in 1/3 SRL r13, r13, 2 // Shift result right by 2 ADDU r13, r13, r14 // Add for the one in 1/3 SRL r13, r13, 2 // Shift result right by 2 BBIN r13(8), DIV3DONE // Test for upper remainder bit BMB0 r13, r0, DIV3DONE // Test rest of remainder NOP ADDIU r13, r13, 0x100 // Increment the result DIV3DONE: JR r31 // Return from subroutine RAM r18_FAIRSHARE, r13, 9, 24, 0 // Align result and put it in r18_FAIRSHARE MAXMIN22: sub r14, r18_FAIRSHARE, r22 bltz r14, REPLACE //* IF the current flow counter in R22 has a value nop //* GREATER THAN OR EQUAL TO that of the current fair share, //* we have the max/min fair share and we can jump to REPLACE. //* OTHERWISE, we subtract the counter of this flow from the running capacity //* total that is available for share among the rest of the flows. Then we //* divide it between those flows. //* K = K-1 = 2 sub r19_RUNNINGCAP, r19_RUNNINGCAP, r22 //* running capacity total -= r22 ram r18_FAIRSHARE, r19_RUNNINGCAP, 1,1,0 //* runningcap divided by k MAXMIN23: sub r14, r18_FAIRSHARE, r23 bltz r14, REPLACE //* IF the current flow counter in R23 has a value nop //* GREATER THAN OR EQUAL TO that of the current fair share, //* we have the max/min fair share and we can jump to REPLACE. //* OTHERWISE, we subtract the counter of this flow from the running capacity //* total that is available for share among the rest of the flows. Then we //* divide it between those flows. //* K = K-1 = 1 sub r19_RUNNINGCAP, r19_RUNNINGCAP, r23 //* running capacity total -= r23 ram r18_FAIRSHARE, r19_RUNNINGCAP, 0,0,0 //* runningcap divided by k MAXMIN24: //* at this point, we are down to one flow and we have the fair share. j REPLACE nop //****************************************** //**** END SORTED MAX/MIN CALCULATION **** //****************************************** NOSORT: //****************************************** //**** BEGIN SORTLESS MAX/MIN CALCULATION **** //****************************************** //* note: r13 holds excess capacity, as a negative number //* find max //* First testing r16 & r17 sub r14, r16, r17 blez r14, MAX17 nop //* FIX ME unnecessary nop or r10, r0, r16 j TEST18 nop MAX17: or r10, r0, r17 TEST18: sub r14, r10, r18 bgez r14, TEST19 nop //* FIX ME these nops could be removed or r10, r0, r18 //* if we move the "sub" instructions up. TEST19: sub r14, r10, r19 bgez r14, TEST20 nop or r10, r0, r19 TEST20: sub r14, r10, r20 bgez r14, TEST21 nop or r10, r0, r20 TEST21: sub r14, r10, r21 bgez r14, TEST22 nop or r10, r0, r21 TEST22: sub r14, r10, r22 bgez r14, TEST23 nop or r10, r0, r22 TEST23: sub r14, r10, r23 bgez r14, TEST24 nop or r10, r0, r23 TEST24: sub r14, r10, r24 bgez r14, ADDIDLE nop or r10, r0, r24 //* r10 holds max value ADDIDLE: //* recall r13 holds excess capacity, as a negative number j REPLACE sub r18_FAIRSHARE, r10, r13 //* adding the idle time to the max value //****************************************** //**** END SORTLESS MAX/MIN CALCULATION **** //****************************************** REPLACE: //* r18_FAIRSHARE holds new entry. //* **** REPLACE TABLE ENTRY **** // following for Rate = (1/8)*(byte count) // ram r18_FAIRSHARE, r18_FAIRSHARE, 16,24,0 //* fairshare (truncated, shifted right by 8 bits) //* fair share is only 10 bits. //* FIX ME currently using only 8 bits // following for Rate = (1/4)*(byte count) // ram r18_FAIRSHARE, r18_FAIRSHARE, 6,24,0 //following for Rate = (1/2)*(byte count) ram r18_FAIRSHARE, r18_FAIRSHARE, 7,24,0 //DEBUG ori r13, r0, 2 bne r15_MYRINGADDR, r13, CSUM20 nop ori r13, r0, COUNTERSUM lw r8, 0x28(r13) nop addu r8, r8, r18_FAIRSHARE sw r8, 0x28(r13) lw r13, RTCNT(r0) ori r14, r0, 0x2a00 sub r13, r13, r14 bltz r13, CSUM23 nop ori r13, r0, COUNTERSUM lw r8, 0x68(r13) nop addu r8, r8, r18_FAIRSHARE j CSUM23 sw r8, 0x68(r13) CSUM20: lw r13, RTCNT(r0) ori r14, r0, 0x2a00 sub r13, r13, r14 bltz r13, CSUM23 nop ori r13, r0, 3 bne r15_MYRINGADDR, r13, CSUM21 ori r13, r0, COUNTERSUM lw r8, 0x50(r13) nop addu r8, r8, r18_FAIRSHARE j CSUM23 sw r8, 0x50(r13) CSUM21: ori r13, r0, 4 bne r15_MYRINGADDR, r13, CSUM22 ori r13, r0, COUNTERSUM lw r8, 0x58(r13) nop addu r8, r8, r18_FAIRSHARE j CSUM23 sw r8, 0x58(r13) CSUM22: ori r13, r0, 5 bne r15_MYRINGADDR, r13, CSUM23 ori r13, r0, COUNTERSUM lw r8, 0x60(r13) nop addu r8, r8, r18_FAIRSHARE j CSUM23 sw r8, 0x60(r13) CSUM23: ram r13, r15_MYRINGADDR, 30,27,2 //* current node's ring address times four accesses the //* correct place in the control message add r13, r1HAD, r13 //* r13 has this node's pointer into the control message //* it is offset from r1HAD by the address of the current node sw r18_FAIRSHARE, CNTRL(r13) nop RATES: //******************************************* //* ********* BEGIN CALCULATE RATES ********* //******************************************* //***************************** //* *** load virtual times from control message bbi r11_CONFIGTABLE(30), WEST //* if bit was set, the control packet was on the //* east ring. therefore, set loop to retrieve west //* ring virtual time values. or r20, r0, r18_FAIRSHARE //* setup put this node's control message entry in r20 j LOADVTS ori r13, r0, 1 //* will increment in the positive direction for east ring v(t) WEST: ori r13, r0, -1 //* will increment in the negative direction for west ring v(t) LOADVTS: or r14, r0, r15_MYRINGADDR //* hold MY RING ADDR here for manipulation in the //* load v(t)s code. //* the load v(t)s code! //* load v(t) for the node which is one hop away into r21. add r14, r14, r13 //* r14 gets ring address which is exactly one hop away from //* the current node. ram r16, r14, 30,27,2 //* keep only lowest three bits, and then multiply by four. add r16, r16, r1HAD //* add r1HAD for the correct base address in local memory lw r21, CNTRL(r16) //* load into 21. //* load v(t) for the node which is two hops away into r22. add r14, r14, r13 //* r14 gets ring address which is exactly two hops away from //* the current node. ram r16, r14, 30,27,2 //* keep only lowest three bits, and then multiply by four. add r16, r16, r1HAD //* add r1HAD for the correct base address in local memory lw r22, CNTRL(r16) //* load into 22. //* load v(t) for the node which is three hops away into r23. add r14, r14, r13 //* r14 gets ring address which is exactly three hops away from //* the current node. ram r16, r14, 30,27,2 //* keep only lowest three bits, and then multiply by four. add r16, r16, r1HAD //* add r1HAD for the correct base address in local memory lw r23, CNTRL(r16) //* load into 23. nop //* control message entries are in registers as follows: //* ascending register number gets further links... //* r20 = closest link, r21 next, r22 next, r23 furthest link. //* when the calculation is complete, //* r16 = rate for closest node, r17 next, r18 next, r19 rate for furthest node. //* if v3 < v2 sub r13, r23, r22 bgez r13, ELSEQ40 nop //* if v3 < v1 sub r13, r23, r21 bgez r13, ELSEQ41 nop //* if v3 < v0 sub r13, r23, r20 bgez r13, ELSEQ42 //* set Q4 = v3 or r19, r0, r23 j DONEQ4 nop ELSEQ42: //* else { v0 smallest } //* set Q4=Q3=Q2=Q1 = v0 or r19, r0, r20 or r18, r0, r20 or r17, r0, r20 or r16, r0, r20 j DONEALL nop ELSEQ41: //* else { v1 <= V3 < v2 } //* if v1 < v0 sub r13, r21, r20 bgez r13, ELSEQ43 nop //* set Q4=Q3=Q2 = v1 //* set Q1 = v0 or r19, r0, r21 or r18, r0, r21 or r17, r0, r21 or r16, r0, r20 j DONEALL nop ELSEQ43: //* else { v0 <= v1 <= v3 < v2 } //* set Q4=Q3=Q2=Q1 = v0 or r19, r0, r20 or r18, r0, r20 or r17, r0, r20 or r16, r0, r20 j DONEALL nop ELSEQ40: //* else { v2 <= v3 } //* if v2 < v1 sub r13, r22, r21 bgez r13, ELSEQ44 nop //* if v2 < v0 sub r13, r22, r20 bgez r13, ELSEQ45 nop //* set Q4=Q3 = v2 or r19, r0, r22 or r18, r0, r22 j DONEQ3 nop ELSEQ45: //* else { v0 smallest } //* set Q4=Q3=Q2=Q1 = v0 or r19, r0, r20 or r18, r0, r20 or r17, r0, r20 or r16, r0, r20 j DONEALL nop ELSEQ44: //* else { v1 <= V2 <= v3 } //* if v1 < v0 sub r13, r21, r20 bgez r13, ELSEQ46 nop //* set Q4=Q3=Q2 = v1 //* set Q1 = v0 or r19, r0, r21 or r18, r0, r21 or r17, r0, r21 or r16, r0, r20 j DONEALL nop ELSEQ46: //* else { v0 <= v1 <= v2 <= v3 } //* set Q4=Q3=Q2=Q1 = v0 or r19, r0, r20 or r18, r0, r20 or r17, r0, r20 or r16, r0, r20 j DONEALL nop DONEQ4: //* we can ignore v3 //* if v2 < v1 sub r13, r22, r21 bgez r13, ELSEQ30 nop //* if v2 < v0 sub r13, r22, r20 bgez r13, ELSEQ31 nop //* set Q3 = v2 or r18, r0, r22 j DONEQ3 nop ELSEQ31: //* else { v0 <= v2 < v1 } //* set Q3=Q2=Q1 = v0 or r18, r0, r20 or r17, r0, r20 or r16, r0, r20 j DONEALL nop ELSEQ30: //* else { v1 <= v2 } //* if v1 < v0 sub r13, r21, r20 bgez r13, ELSEQ32 nop //* set Q3=Q2 = v1 //* set Q1 = v0 or r18, r0, r21 or r17, r0, r21 or r16, r0, r20 j DONEALL nop ELSEQ32: //* else { v0 <= v1 <= v2 } //* set Q3=Q2=Q1 = v0 or r18, r0, r20 or r17, r0, r20 or r16, r0, r20 j DONEALL nop DONEQ3: //* we can ignore v3 and v2 //* if v1 < v0 sub r13, r21, r20 bgez r13, ELSEQ20 nop //* set Q2 = v1 //* set Q1 = v0 or r17, r0, r21 or r16, r0, r20 j DONEALL nop ELSEQ20: //* else { v0 <= v1 } //* set Q2=Q1 = v0 or r17, r0, r20 or r16, r0, r20 //******************************************* //* ********* END CALCULATE RATES ********* //******************************************* DONEALL: //* Enabling rate limiters. this is required only if we must write the entire //* word for a queue in QTABLE2 or order to change the rate limit. //FIXME we don't still need this. ori r16, r16, 0x800 //* enable rate limiter by setting bit 11. ori r17, r17, 0x800 //* enable rate limiter by setting bit 11. ori r18, r18, 0x800 //* enable rate limiter by setting bit 11. ori r19, r19, 0x800 //* enable rate limiter by setting bit 11. //* ********* FIND RATE ADDRESSES ********* //* Find addresses for writing information. //* put address for r19 in r23, addr for r18 in r22, etc. //* qtable2 addresses are offset from the pom base... //* 0x400 + ( queue# * 8 + port ) * 4 + 0x2 //* //* pom base is dependant on pim id information //* queue# is dependent on DESTADDR for each flow, //* each could also be considered as increasing offset from MYADDR //* if considered this way, the offset would be //* positive for eastbound flows and negative for westbound flows //* port is dependant on srcport information //* Build base address ram r24, r15_MYRINGADDR, 30,0,2 //* port information * four addi r24, r24, 0x402 //* add 0x402 orui r24, r24, POMBASE //* upper half of address for pom bbi r11_CONFIGTABLE(30), WESTRL //* if bit was set, the control packet was on the //* east ring. therefore, set loop to calculate west //* ring queue locations. or r14, r0, r15_MYRINGADDR //* hold MY RING ADDR +/- 1 in r14 as the first iteration ori r24, r24, POMAOFF //* POM A base for east ring j FINDRLADDR ori r13, r0, 1 //* will increment in the positive direction for east ring WESTRL: ori r24, r24, POMWEST //* POM B base for west ring ori r13, r0, -1 //* will increment in the negative direction for west ring FINDRLADDR: add r14, r14, r13 //* proper starting point for queue # ram r20, r14, 27,24,5 //* multiply by eight * four (32) //* and lose all but three address bits add r20, r20, r24 //* add base add r14, r14, r13 //* increment to next queue ram r21, r14, 27,24,5 //* multiply by eight * four (32) //* and lose all but three address bits add r21, r21, r24 //* add base add r14, r14, r13 //* increment to next queue ram r22, r14, 27,24,5 //* multiply by eight * four (32) //* and lose all but three address bits add r22, r22, r24 //* add base add r14, r14, r13 //* increment to next queue ram r23, r14, 27,24,5 //* multiply by eight * four (32) //* and lose all but three address bits add r23, r23, r24 //* add base //******************************************* //****** BEGIN SET RATES ****** //******************************************* // addr for r19 in r23, addr for r18 in r22, // addr for r17 in r21, addr for r16 in r20 //* FIX ME -- what's the best way? //* perhaps r13 gets the offset from base for storage ? //* for now, each byte loaded into 0x11c0 and transferred //* one at a time. //* //* sb r19, RATE_STRG(r13) //* or we could use this ? //* store rate limit r19 sw r19, 0x1200(r0) sw r19, 0x1204(r0) ori r30, r0, 0x1200 wbr30 r23, 0, 2 //* store rate limit r18 sw r18, 0x1208(r0) sw r18, 0x120c(r0) ori r30, r0, 0x1208 wbr30 r22, 0, 2 //* store rate limit r17 sw r17, 0x1210(r0) sw r17, 0x1214(r0) ori r30, r0, 0x1210 wbr30 r21, 0, 2 //* store rate limit r16 sw r16, 0x1218(r0) sw r16, 0x121c(r0) ori r30, r0, 0x1218 wbr30 r20, 0, 2 trapqne //******************************************* //****** END SET RATES ****** //******************************************* SENDCONTROL: //******************************************* //* **** BEGIN COUNT **** //******************************************* jal SaveHdrBfr nop IncrementCounter CTRLPKTS //******************************************* //* **** END COUNT **** //******************************************* //* **** QUEUE AND SEND **** //******************************************* //* a copy of above code to release pkt bbin r5UID(PHBIT), EMITPKT3 // does pkt have payload? ori r6LOD, r0, 0 // no -- r6LOD for HEADER packet bbi r5UID(LHBIT), LONGPKT3 // is pkt LONG? andoi r2HBB, r1HAD, 0xFF80 // Compute the Header Buffer Base if needed // To find the ICM j EMITPKT3 // no -- it's COMPLETE ram r6LOD, r4LID, 0, 9, 16 // r6LOD for COMPLETE packet LONGPKT3: lw r18, ICM(r2HBB) // load ICM nop // wait for ICM load to complete ram r6LOD, r18, 0, 0, 16 // r6LOD for LONG packet EMITPKT3: ori r6LOD, r6LOD, 0x000 //* set 0 byte offset in order to keep the ring header //* and the stuff bytes in the // lower output descriptor addi r7UOD, r5UID, 0 // Copy r5UID into r7UOD //* don't change the length because we need the ring header sdw r6LOD, PODU(r1HAD) // Save the first word64 into // the output descriptor add r7UOD, r0, r0 // Clear the first word32 add r6LOD, r0, r0 // Clear the second word32 sdw r6LOD, PODM1(r1HAD) // Save the second word64 into // the output descriptor BBI r5UID(ERBIT), ERROR // Branch if any packet error ram r14, r11_CONFIGTABLE, 14,30,0 //* get output POM from config table beq r14, r0, SENDRINGPKT3 //* if the POM bits are zero, put the pkt on poma, whose //* base address is already stored ram r13, r11_CONFIGTABLE, 21,23,3 //* output queue from config table times 8 //* fall through to POMB Packet processing addi r13, r13, SBWEST //* add 0x1000 to the poma base address in order to reach the pomb base SENDRINGPKT3: add r13, r13, r27SBB // add the smart buffer base // address. R13 now contains // the address of the queue // this packet should be written // into. addi r1HAD, r1HAD, -8 // Adjust the Header Address to point to the Output // Descriptor instead of the Input Descriptor pkrlr1 r13, 0, 15 // Send packet //* 15 word 64s is the size of the DMA transfer, //* probably always large enough nop jr r25 //* return to get next packet. nop //****************************** //* //* END CONTROL PACKET PROCESSING //* //****************************** STRIP_PKT: StopLoop1: j StopLoop1 nop jr r25 nop SAMEPORT: IncrementCounter SMPRTDIS addi r11, r27SBB, DISCARD // Discard queue address addi r1HAD, r1HAD, -8 // Adjust the Header Address to point to the Output // Descriptor instead of the Input Descriptor pkrlr1 r11, 0, 15 // Discard packet nop jr r25 // Go process the next packet nop ERROR_ILLEGAL_TYPE: StopLoop2: j StopLoop2 nop //* error where the type field in the ring header is not one we've defined. //* perhaps later we will count these addi r11, r27SBB, DISCARD // Discard queue address addi r1HAD, r1HAD, -8 // Adjust the Header Address to point to the Output // Descriptor instead of the Input Descriptor pkrlr1 r11, 0, 15 // Discard packet nop jr r25 // Go process the next packet nop ERROR_HOST_PKT: //* error where a newly injected packet had its own source address as the destination. //* perhaps later we will count these. IncrementCounter ERRPKT addi r11, r27SBB, DISCARD // Discard queue address addi r1HAD, r1HAD, -8 // Adjust the Header Address to point to the Output // Descriptor instead of the Input Descriptor pkrlr1 r11, 0, 15 // Discard packet nop jr r25 // Go process the next packet nop ERROR: IncrementCounter ERRPKT addi r11, r27SBB, DISCARD // Discard queue address addi r1HAD, r1HAD, -8 // Adjust the Header Address to point to the Output // Descriptor instead of the Input Descriptor pkrlr1 r11, 0, 15 // Discard packet nop jr r25 // Go process the next packet nop OURPKT: EXIT: jr r25 nop // // Forward aux packets: DMA aux packet to host queue, then increment semaphore1 // AuxPkt: andoi r1HAD, r1HAD, 0xFF80 // Compute the Header Buffer Base traprel r1HAD // Wait for order dependency to clear // DMA pkt to MIPS aux queue and do dummy DMA to increment Semaphore 1 pkrlr1 r7QADDR, 0, HOST_FWD_LEN // DMA IHD/header to SBUF QAddr wbr1 r13SEMA1, 0, SEMA_SIZ // Increment semaphore 1 jr r25 nop // // Save a header buffer to main memory // SaveHdrBfr: ori r19, r0, LOGENABLE // Get the log enable constant bbin r19(0), DoNotLog // Test it lw r19, HBADDR(r0) // Get the current counter nop addiu r19, r19, 128 // Increment to the next buffer sw r19, HBADDR(r0) // Write the counter back andoi r30, r1HAD, 0xff80 // Point to the start of the Header Buffer wxr30 r19, 0, 0 // Write out the entire HB trapqne DoNotLog: jr r31 // Return from subroutine nop SaveCP: ori r19, r0, LOGENABLE2 // Get the log enable constant bbin r19(0), DoNotLog2 // Test it lw r19, HBADDR(r0) // Get the current counter nop bbi r19(14), DoNotLog2 nop addiu r19, r19, 128 // Increment to the next buffer sw r19, HBADDR(r0) // Write the counter back andoi r30, r1HAD, 0xff80 // Point to the start of the Header Buffer wxr30 r19, 0, 0 // Write out the entire HB trapqne DoNotLog2: jr r31 // Return from subroutine nop