

## 6.004 Recitation Problems

### L14 – RISC-V Processor

# Single-Cycle RISC-V Processor

---



**Diagram 1: Abstract Version**

- Contains all major components: PC Register, Instruction and Data memories, Decode logic, Execute logic (including the ALU), and Register file
- Skips some details in the wiring of signals to increase readability



**Diagram 2: Detailed Version**

- Also contains all of the major components, as well as additional details like muxes on inputs, descriptions of various signal contents and bit widths
- Both diagrams are equivalent! Differ only in level of detail

## RISC-V Processor: Components

Refer to the 6.004 ISA Reference Tables (Website > Resources) for details about each instruction.

### ProcTypes

```
typedef Bit#(32) Word;

// Branch function enumeration
typedef enum {
    Eq,
    Neq,
    Lt,
    Ltu,
    Ge,
    Geu,
    Dbr
} BrFunc;

// Return type for decode()
typedef struct {
    IType iType;
    AluFunc aluFunc;
    BrFunc brFunc;
    Maybe#(RIndx) dst;
    RIndx src1;
    RIndx src2;
    Word imm;
} DecodedInst;

// Register Index Type
typedef Bit#(5) RIndx;

// Register File writes
typedef struct {
    RIndx index;
    Word data;
} RegWriteArgs;

// Return type for execute()
typedef struct {
    IType iType;
    Maybe#(RIndx) dst;
    Word data;
    Word addr;
    Word nextPc;
} ExecInst;

// Memory writes
typedef struct {
    Word addr;
    Word data;
} MemWriteReq;
```

```

// Opcode
Bit#(7) opOpImm = 7'b0010011;
Bit#(7) opOp     = 7'b0110011;
Bit#(7) opLui    = 7'b0110111;
Bit#(7) opJal    = 7'b1101111;
Bit#(7) opJalr   = 7'b1100111;
Bit#(7) opBranch = 7'b1100011;
Bit#(7) opLoad   = 7'b0000011;
Bit#(7) opStore  = 7'b0100011;
Bit#(7) opAuipc  = 7'b0010111;

// funct3 - ALU
Bit#(3) fnADD   = 3'b000;
Bit#(3) fnSLL   = 3'b001;
Bit#(3) fnSLT   = 3'b010;
Bit#(3) fnSLTU  = 3'b011;
Bit#(3) fnXOR   = 3'b100;
Bit#(3) fnSR    = 3'b101;
Bit#(3) fnOR    = 3'b110;
Bit#(3) fnAND   = 3'b111;

// funct3 - Branch
Bit#(3) fnBEQ   = 3'b000;
Bit#(3) fnBNE   = 3'b001;
Bit#(3) fnBLT   = 3'b100;
Bit#(3) fnBGE   = 3'b101;
Bit#(3) fnBLTU  = 3'b110;
Bit#(3) fnBGEU  = 3'b111;

// funct3 - Load
Bit#(3) fnLW    = 3'b010;
Bit#(3) fnLB    = 3'b000;
Bit#(3) fnLH    = 3'b001;
Bit#(3) fnLBU   = 3'b100;
Bit#(3) fnLHU   = 3'b101;

// funct3 - Store
Bit#(3) fnSW    = 3'b010;
Bit#(3) fnSB    = 3'b000;
Bit#(3) fnSH    = 3'b001;

// funct3 - JALR
Bit#(3) fnJALR  = 3'b000;

```

## Register File

```

module RegisterFile;
  Vector#(32, Reg#(Word)) regs(0);

  method Word rd1(RIndx rindx) = regs[rindx];
  method Word rd2(RIndx rindx) = regs[rindx]; } outputs

  input Maybe#(RegWriteArgs) wr default = Invalid; } inputs

  rule rfWrite;
    if (isValid(wr)) begin
      RegWriteArgs rwd = fromMaybe(?, wr);
      if (rwd.index != 0)
        regs[rwd.index] <= rwd.data;
    end
  endrule
endmodule

```

glorified array  
you read from

if isValid(wr),  
we write something

takes 32-bit word & splits  
it up to decode

## Decode

```
function DecodedInst decode(Bit#(32) inst);
    let opcode = inst[6:0];
    let funct3 = inst[14:12];
    let funct7 = inst[31:25];
    let dst    = inst[11:7];
    let src1   = inst[19:15];
    let src2   = inst[24:20];

    Maybe #(RIdx) validDst = Valid(dst);
    Maybe #(RIdx) dDst = Invalid; // default value
    RIdx dSrc = 5'b0;

    // DEFAULT VALUES - Use the following for your default values:
    // dst: dDst, src1: dSrc, src2: dSrc, imm: immD, BrFunc: Dbr, AluFunc: ?

    // We have provided a default value and done immB for you.
    Word immD32 = signExtend(1'b0); // default value
    Bit#(12) immB = { inst[31], inst[7], inst[30:25], inst[11:8] };
    Word immB32 = signExtend({immB, 1'b0});
    Bit#(20) immU = 0; // TODO
    Word immU32 = 0; // TODO
    Bit#(12) immI = 0; // TODO
    Word immI32 = 0; // TODO
    Bit#(20) immJ = 0; // TODO
    Word immJ32 = 0; // TODO
    Bit#(12) immS = 0; // TODO
    Word immS32 = 0; // TODO

    DecodedInst dInst = unpack(0);
    dInst.iType = Unsupported; // unsupported by default

    case (opcode)
        opAuipc: begin
            dInst = DecodedInst {
                iType: AUIPC,
                dst: validDst,
                src1: dSrc,
                src2: dSrc,
                imm: immU32,
                brFunc: Dbr,
                aluFunc: ?
            };
        end
        opLui: // TODO
    end
```

case (opcode)  
→ figures out specific  
instructions & what  
to do w/ them.

decodes 32-bit  
word into a dInst.  
type

### Decode (continued)

```
opOpImm: begin
    dInst.iType = OPIMM;
    dInst.src1 = src1;
    dInst.imm = immI32;
    dInst.dst = validDst;

    case (funct3)
        fnAND : dInst.aluFunc = And; // Decode ANDI instructions
        fnOR  : dInst.iType = Unsupported; // TODO
        fnXOR : dInst.iType = Unsupported; // TODO
        fnADD : dInst.iType = Unsupported; // TODO
        fnSLT : dInst.iType = Unsupported; // TODO
        fnSLTU: dInst.iType = Unsupported; // TODO
        fnSLL : case (funct7)
            7'b0000000: dInst.aluFunc = Sll;
            // Otherwise we must say the instruction is invalid:
            default: dInst.iType = Unsupported;
        endcase
        fnSR : // TODO
            dInst.iType = Unsupported;
            default: dInst.iType = Unsupported;
    endcase
end
opOp: // TODO
opBranch: // TODO
opJal: // TODO
opLoad: // TODO
opStore: // TODO
opJalr: // TODO
endcase
return dInst;
endfunction
```

Operation that includes an Imm value

opOp: operation between two registers

### Branch ALU (Execute.ms)

```
function Bool aluBr(Word a, Word b, BrFunc brFunc);
    Bool res = case (brFunc)
        Eq:      (a == b);
        Neq:     (a != b);
        Lt:      signedLT(a, b);
        Ltu:     (a < b);
        Ge:      signedGE(a, b);
        Geu:     (a >= b);
        default: False;
    endcase;
    return res;
endfunction
```

returns true or  
false depending  
on whether you  
should branch.

### Execute

```
function ExecInst execute(DecodedInst dInst, Word rVal1, Word rVal2, Word pc);
    let imm = dInst.imm;
    let brFunc = dInst.brFunc;
    let aluFunc = dInst.aluFunc;
    let aluVal2 = dInst.iType == OPIMM ? imm : rVal2;

    Word data = case (dInst.iType)
        AUIPC:      pc + imm;
        LUI:         0; // TODO
        OP, OPIMM:   0; // TODO
        JAL, JALR:   0; // TODO
        STORE:       0; // TODO
        default:     0;
    endcase;

    Word nextPc = case (dInst.iType)
        BRANCH: 0; // TODO Replace 0 with the correct expression
        JAL:     0; // TODO Replace 0 with the correct expression
        JALR:   (rVal1 + imm) & ~1; // "& ~1" clears the bottom bit.
        default: pc + 4;
    endcase;

    Word addr = 0; // TODO Replace 0 with the correct expression

    return ExecInst{iType: dInst.iType, dst: dInst.dst, data: data,
                    addr: addr, nextPc: nextPc};
endfunction
```

takes  
decoded  
instruction  
& creates  
an execute  
instruction

think of the data (e.g. Decoded Inst, ExecInst) as pipelined register

## Magic Memory

```
module MagicMemory;
    // 64 KB magic memory array
    MagicMemoryArray magicMem("mem.vmh");

    method Word read(Word addr) = magicMem.sub(truncate(addr >> 2));

    input Maybe#(MemWriteReq) write default = Invalid;

    rule doWrite;
        if (isValid(write)) begin
            MemWriteReq req = fromMaybe(?, write);
            if (req.addr == 'h4000_0000) begin
                // Write character to stdout
                $write("%c", req.data[7:0]);
            end else if (req.addr == 'h4000_0004) begin
                // Write integer to stdout
                $write("%0d", req.data);
            end else if (req.addr == 'h4000_1000) begin
                // Exit simulation
                if (req.data == 0) begin
                    $display("PASSED");
                end else begin
                    $display("FAILED %0d", req.data);
                end
                $finish;
            end else begin
                // Write memory array
                magicMem.upd = ArrayWriteReq{idx: truncate(req.addr >> 2),
                                              data: req.data};
            end
        end
    endrule
endmodule
```

*Data memory that returns result same cycle you request it*

## Processor

```
module Processor;
    Reg#(Word) pc(0);
    RegisterFile rf;
    MagicMemory iMem; // Memory for loading instructions
    MagicMemory dMem; // Memory for loading and storing data

    rule doSingleCycle;
        // Load the instruction from instruction memory (iMem)
        Word inst = 0; // TODO

        // Decode the instruction
        DecodedInst dInst = unpack(0); // TODO

        // Read the register values used by the instruction
        Word rVal1 = 0; // TODO
        Word rVal2 = 0; // TODO

        // Compute all outputs of the instruction
        ExecInst eInst = unpack(0); // TODO

        if (eInst.iType == LOAD) begin
            // TODO: Load from data memory (dMem) if needed
        end else if (eInst.iType == STORE) begin
            // TODO: Store to data memory (dMem) if needed
        end

        if (isValid(eInst.dst)) begin
            // TODO: Write to a register if the instruction requires it
        end

        // TODO: Update pc to the next pc
    endrule
endmodule
```

Uses decode, execute, fetch, writeback & manages them

like a secretary/manager/director  
that manages all their activities

### Problem 1.

Decode the following 32-bit RISC-V instructions:

1. 0100000 00001 00100 000 00011 0110011

*rs2    rs1    rd  
x1    x4    x3*

*Sub x3, x4, x1*

2. 0100000 00101 00010 101 00111 0010011

*shamt    rs1    rd  
(shift amount) x2    x7  
5*

*SRAI x7, x2, 5*

MIT 6.004 ISA Reference Card: Instruction Encodings

| 31           | 25 | 24  | 20                    | 19  | 15 | 14     | 12 | 11          | 7 | 6 | 0      |        |
|--------------|----|-----|-----------------------|-----|----|--------|----|-------------|---|---|--------|--------|
| funct7       |    | rs2 |                       | rs1 |    | funct3 |    | rd          |   |   | opcode | R-type |
| imm[11:0]    |    |     |                       | rs1 |    | funct3 |    | rd          |   |   | opcode | I-type |
| imm[11:5]    |    | rs2 |                       | rs1 |    | funct3 |    | imm[4:0]    |   |   | opcode | S-type |
| imm[12:10:5] |    | rs2 |                       | rs1 |    | funct3 |    | imm[4:1 11] |   |   | opcode | B-type |
|              |    |     | imm[31:12]            |     |    |        |    | rd          |   |   | opcode | U-type |
|              |    |     | imm[20:10:1 11:19:12] |     |    |        |    | rd          |   |   | opcode | J-type |

RV32I Base Instruction Set (MIT 6.004 subset)

|              |                       |     |     |          |             |         |      |
|--------------|-----------------------|-----|-----|----------|-------------|---------|------|
|              | imm[31:12]            |     |     | rd       | 0110111     | LUI     |      |
|              | imm[20:10:1 11:19:12] |     |     | rd       | 1101111     | JAL     |      |
|              | imm[11:0]             |     | rs1 | 000      | rd          | 1100111 | JALR |
| imm[12:10:5] | rs2                   | rs1 |     | 000      | imm[4:1 11] | 1100011 | BEQ  |
| imm[12:10:5] | rs2                   | rs1 |     | 001      | imm[4:1 11] | 1100011 | BNE  |
| imm[12:10:5] | rs2                   | rs1 |     | 100      | imm[4:1 11] | 1100011 | BLT  |
| imm[12:10:5] | rs2                   | rs1 |     | 101      | imm[4:1 11] | 1100011 | BGE  |
| imm[12:10:5] | rs2                   | rs1 |     | 110      | imm[4:1 11] | 1100011 | BLTU |
| imm[12:10:5] | rs2                   | rs1 |     | 111      | imm[4:1 11] | 1100011 | BGEU |
| imm[11:0]    |                       | rs1 | 010 | rd       | 0000011     | LW      |      |
| imm[11:5]    | rs2                   | rs1 | 010 | imm[4:0] | 0100011     | SW      |      |
| imm[11:0]    |                       | rs1 | 000 | rd       | 0010011     | ADD     |      |
| imm[11:0]    |                       | rs1 | 010 | rd       | 0010011     | SLTI    |      |
| imm[11:0]    |                       | rs1 | 011 | rd       | 0010011     | SLTIU   |      |
| imm[11:0]    |                       | rs1 | 100 | rd       | 0010011     | XORI    |      |
| imm[11:0]    |                       | rs1 | 110 | rd       | 0010011     | ORI     |      |
| imm[11:0]    |                       | rs1 | 111 | rd       | 0010011     | ANDI    |      |
| 0000000      | shamt                 | rs1 | 001 | rd       | 0010011     | SLLI    |      |
| 0000000      | shamt                 | rs1 | 101 | rd       | 0010011     | SRLI    |      |
| 0100000      | shamt                 | rs1 | 101 | rd       | 0010011     | SRAI    |      |
| 0000000      | rs2                   | rs1 | 000 | rd       | 0110011     | ADD     |      |
| 0100000      | rs2                   | rs1 | 000 | rd       | 0110011     | SUB     |      |
| 0000000      | rs2                   | rs1 | 001 | rd       | 0110011     | SLL     |      |
| 0000000      | rs2                   | rs1 | 010 | rd       | 0110011     | SLT     |      |
| 0000000      | rs2                   | rs1 | 011 | rd       | 0110011     | SLTU    |      |
| 0000000      | rs2                   | rs1 | 100 | rd       | 0110011     | XOR     |      |
| 0000000      | rs2                   | rs1 | 101 | rd       | 0110011     | SRL     |      |
| 0100000      | rs2                   | rs1 | 101 | rd       | 0110011     | SRA     |      |
| 0000000      | rs2                   | rs1 | 110 | rd       | 0110011     | OR      |      |
| 0000000      | rs2                   | rs1 | 111 | rd       | 0110011     | AND     |      |

Question: Can you make this using existing code? If not, what do you change.

### Problem 2. \*

The RISC-V LUI instruction, Load Upper Immediate, loads a 20-bit immediate into the upper 20 bits of the specified destination register (and sets the lower 12 bits to zero). What modifications to the datapath are needed to implement the LUI instruction?

LUI rd, immU       $reg[rd] \leftarrow immU \ll 12$

$imm[31:12]$  rd 0110111

modifications needed: (their sol'n)

1. extract 20-bit immediate from instruction

[in decode]

2. shift left 12 bits (just add 12 zeros on the right) [in decode]

3. extend final datapath mux (WDSEL) to take new value

their sol'n when

WDSEL = 3,  
do imm12.

our sol'n:  
have ALU do  
nothing

#### Decode

```
function DecodedInst decode(Bit#(32) inst);
    let opcode = inst[6:0];
    let funct3 = inst[14:12];
    let funct7 = inst[31:25];
    let dst = inst[11:7];
    let src1 = inst[19:15];
    let src2 = inst[24:20];

```

```
Maybe#(RIndex) validDst = Valid(dst);
Maybe#(RIndex) dDst = Invalid; // default value
RIndex dSrc = 5'b0;
```

```
// DEFAULT VALUES - Use the following for your default values:
// dst: dDst, src1: dSrc, src2: dSrc, imm: immD, BrFunc: Dbr, AluFunc: ?
```

```
// We have provided a default value and done immB for you.
Word immD32 = signExtend(1'b0); // default value
Bit#(12) immB = { inst[31], inst[7], inst[30:25], inst[11:8] };
Word immB32 = signExtend({immB, 1'b0});
Bit#(20) immU = 0; // TODO
Word immU32 = 0; // TODO
Bit#(12) immI = 0; // TODO
Word immI32 = 0; // TODO
Bit#(20) immJ = 0; // TODO
Word immJ32 = 0; // TODO
Bit#(12) immS = 0; // TODO
Word immS32 = 0; // TODO
```

```
DecodedInst dInst = unpack(0);
dInst.iType = Unsupported; // unsupported by default
```

```
case (opcode)
    opAuipc: begin
        dInst = DecodedInst {
            iType: AUIPC,
            dst: validDst,
            src1: dSrc,
            src2: dSrc,
            imm: immU32,
            brFunc: Dbr,
            aluFunc: ?
        };
    end
    opLui: // TODO

```

add → 0 + {immU, 12'0} comes  
out of ALU

ALU doing nothing is just adding zero to immU



```
function ExecInst execute(DecodedInst dInst, Word rVal1, Word rVal2, Word pc);
    let imm = dInst.imm;
    let brFunc = dInst.brFunc;
    let aluFunc = dInst.aluFunc;
    let aluVal2 = dInst.iType == OPIMM ? imm : rVal2;

    Word data = case (dInst.iType)
        AUIPC: pc + imm;
        LUI: 0; // TODO
        OP, OPIMM: 0; // TODO
        JAL, JALR: 0; // TODO
        STORE: 0; // TODO
        default: 0;
    endcase;

    Word nextPc = case (dInst.iType)
        BRANCH: 0; // TODO Replace 0 with the correct expression
        JAL: 0; // TODO Replace 0 with the correct expression
        JALR: (rVal1 + imm) & ~1; // "& ~1" clears the bottom bit.
        default: pc + 4;
    endcase;

    Word addr = 0; // TODO Replace 0 with the correct expression

    return ExecInst{iType: dInst.iType, dst: dInst.dst, data: data, addr: addr, nextPc: nextPc};
endfunction
```

#### TLDR:

- multiple ways to do this. Some options:
  - 1) in decode, modify to {immU, 12'0}. ALU does nothing (add 0), WDSEL=1
  - 2) in decode, output immU. ALU add logic to <<12. WDSEL=1
  - 3) Argument WDSEL

Goal: Use existing or augment  $\sim$  so that cmvz can work!  
existing wires

Problem 3. ★

We want to add the following instruction to the RISC-V ISA:

cmvz rd, rs1, rs2

This new cmvz instruction is a *conditional move*: it checks the contents of source register rs1, and if it's 0, copies the contents of source register rs2 into rd. In other words:

*very similar to beqz!*  
if (reg[rs1] == 0) begin      thoughts: what if we  
    reg[rd] <= reg[rs2];      hijack the branch logic?  
end

We would like to update the hardware diagram of the single-cycle processor to support this new instruction, reusing the existing **branch ALU** comparison logic to check if rs1 is 0. In the diagram below, we have added extra inputs to the BSEL and WDSEL, and WERFSEL muxes to support reusing the branch ALU in this way.

Assume that when the decoder decodes CMVZ, it outputs BrFunc = Eq, BSEL = 3, WDSEL = 3, and WERFSEL = 2. It also outputs PCSEL = 0 so that the next value of PC will be PC + 4.

# Hijacking Branch to determine if we should write

(Logic for computing PC for branches and jumps omitted)



Summary of some of the relevant processor components in the above diagram:

- One register in the register file can be written to each cycle by providing the register index in `WA` and the data to write in `WD`. The data is only written if the write enable signal, `WE`, is 1.
- The ALU receives two inputs, `A` and `B`. It computes an arithmetic operation on them specified by `AluFunc`, outputting the 32-bit result to `aluResult`, and a comparison specified by `BrFunc`, outputting the 1-bit result to `branch`.

- (a) For each of the missing mux inputs X, Y, and Z, write down either a constant or the name of a different wire or port in the processor hardware diagram that should be connected to each input, so that the processor executes CMVZ correctly.

*there are no X, Y, Zs in the diagram lmao*

*→ I figured out what they are & labeled them ☺*

X: branch

Y: 0

Z: rVal2

Assume that when the decoder decodes CMVZ, it outputs BrFunc = Eq, BSEL = 3, WDSEL = 3, and WERFSEL = 2. It also outputs PCSEL = 0 so that the next value of PC will be PC + 4.

- (b) We would like to check that our processor can still decode and execute the old RISC-V instructions correctly. Suppose the processor is decoding an OPIMM instruction, such as ADDI. For each of the select signals we changed (shown in **bold** in the diagram), write down the correct signals that the decoder should produce for such an instruction.

*this to make sure we didn't mess up the whole thing for other inputs*

Decoded value of BSEL: 1

Decoded value of WDSEL: 1

Decoded value of WERFSEL: 1



#### Problem 4.

(a) The RISC-V MUL instruction multiplies two 32-bit values together and places the **lower** 32 bits of the product in the destination register. It has an opcode of 7'b0110011, which is the same as the other OP type instructions, and a funct7 of 7'b0000001, and a funct3 of 3'b000. What changes to the datapath would be needed to support the MUL instruction? Use the combinational multiplier from Lecture 12.



**Modifications needed:**

1. add multiplier to ALU
2. select lower 32 bits for writeback data

```

Execute
function ExecInst execute(DecodedInst dInst,Word rVal1,Word rVal2,Word pc);
    let imm = dInst.imm;
    let brFunc = dInst.brFunc;
    let aluFunc = dInst.aluFunc;
    let aluVal2 = dInst.iType == OPIMM ? imm : rVal2;
    Bit#(16) produce = multiply-by-adding(rVal1, rVal2);
    Word data = case (dInst.iType)
        ALUPC: pc + imm;
        LUI: 0; // TODO
        OP, OPIMM: 0; // TODO
        JAL, JALR: 0; // TODO
        STORE: 0; // TODO
        default: 0;
    endcase;
    Word nextPc = case (dInst.iType)
        BRANCH: 0; // TODO Replace 0 with the correct expression
        JAL, JALR: 0; // TODO Replace 0 with the correct expression
        JALR: (rVal1 + imm) & ~1; // "& -1" clears the bottom bit.
        default: pc + 4;
    endcase;
    Word addr = 0; // TODO Replace 0 with the correct expression
    return ExecInst{iType: dInst.iType, dst: dInst.dst, data: data,
        addr: addr, nextPc: nextPc};
endfunction

```

(b) The RISC-V MULH instruction performs the same multiplication in (a), but returns the **upper** 32 bits of the product in the destination register. It has the same opcode and funct7 codes, but its funct3 is 3'b001. What modifications to 3(a) would be needed to support the MULH instruction?

```

Execute
function ExecInst execute(DecodedInst dInst,Word rVal1,Word rVal2,Word pc);
    let imm = dInst.imm;
    let brFunc = dInst.brFunc;
    let aluFunc = dInst.aluFunc;
    let aluVal2 = dInst.iType == OPIMM ? imm : rVal2;
    Bit#(16) produce = multiply-by-adding(rVal1, rVal2);
    Word data = case (dInst.iType)
        ALUPC: pc + imm;
        LUI: 0; // TODO
        OP, OPIMM: 0; // TODO
        JAL, JALR: 0; // TODO
        STORE: 0; // TODO
        default: 0;
    endcase;
    Word nextPc = case (dInst.iType)
        BRANCH: 0; // TODO Replace 0 with the correct expression
        JAL, JALR: 0; // TODO Replace 0 with the correct expression
        JALR: (rVal1 + imm) & ~1; // "& -1" clears the bottom bit.
        default: pc + 4;
    endcase;
    Word addr = 0; // TODO Replace 0 with the correct expression
    return ExecInst{iType: dInst.iType, dst: dInst.dst, data: data,
        addr: addr, nextPc: nextPc};
endfunction

```

## Problem 5. ★

Add a branch if greater-than (BGT) instruction to the provided RISC-V processor. The instruction encoding should match other branch instructions, but have funct3 = 3'b010.

$B_{gt} \#(3) - fnGT = 3'6010$

### Decode

```
function DecodedInst decode(Bit#(32) inst);
    let opcode = inst[6:0];
    let funct3 = inst[14:12];
    let funct7 = inst[31:25];
    let dst    = inst[11:7];
    let src1   = inst[19:15];
    let src2   = inst[24:20];

    Maybe#(RIdx) validDst = Valid(dst);
    Maybe#(RIdx) dDst = Invalid; // default value
    RIdx dSrc = 5'b0;

    // DEFAULT VALUES - Use the following for your default values:
    // dst: dDst, src1: dSrc, src2: dSrc, imm: immD, BrFunc: Dbr, AluFunc: ?

    // We have provided a default value and done immB for you.
    Word immD32 = signExtend(1'b0); // default value
    Bit#(12) immB = { inst[31], inst[7], inst[30:25], inst[11:8] };
    Word immB32 = signExtend({immB, 1'b0});
    Bit#(20) immU = 0; // TODO
    Word immU32 = 0; // TODO
    Bit#(12) immI = 0; // TODO
    Word immI32 = 0; // TODO
    Bit#(20) immJ = 0; // TODO
    Word immJ32 = 0; // TODO
    Bit#(12) immS = 0; // TODO
    Word immS32 = 0; // TODO

    DecodedInst dInst = unpack(0);
    dInst.iType = Unsupported; // unsupported by default

    case (opcode)
        opAuipc: begin
            dInst = DecodedInst {
                iType: AUIPC,
                dst: validDst,
                src1: dSrc,
                src2: dSrc,
                imm: immU32,
                brFunc: Dbr,
                aluFunc: ?
            };
        end
        opLui: // TODO
    end

```

**Decode (continued)**

```

opOpImm: begin
    dInst.iType = OPIMM;
    dInst.src1 = src1;
    dInst.imm = immI32;
    dInst.dst = validDst;

    case (funct3)
        fnAND : dInst.aluFunc = And; // Decode ANDI instructions
        fnOR  : dInst.iType = Unsupported; // TODO
        fnXOR : dInst.iType = Unsupported; // TODO
        fnADD : dInst.iType = Unsupported; // TODO
        fnSLT : dInst.iType = Unsupported; // TODO
        fnSLTU: dInst.iType = Unsupported; // TODO
        fnSLL : case (funct7)
            7'b0000000: dInst.aluFunc = Sll;
            // Otherwise we must say the instruction is invalid:
            default: dInst.iType = Unsupported;
        endcase
        fnSR : // TODO
            dInst.iType = Unsupported;
            default: dInst.iType = Unsupported;
    endcase
end
opOp: // TODO
opBranch: // TODO
opJal: // TODO
opLoad: // TODO
opStore: // TODO
opJalr: // TODO
endcase
return dInst;
endfunction

```

### opBranch:

*(case(funct3):  
 $fnGT: dInst = DecodedInst$   
 $\{ dst: Invalid, src1:src2, imm:immB,$   
 $brFunc: Gt, aluFunc: ?,$   
 $iType: BRANCH \}$*

### Branch ALU (Execute.ms)

```
function Bool aluBr(Word a, Word b, BrFunc brFunc);
    Bool res = case (brFunc)
        Eq:      (a == b);
        Neq:     (a != b);
        Lt:      signedLT(a, b);
        Ltu:     (a < b);
        Ge:      signedGE(a, b);
        Geu:     (a >= b);
        default: False;
    endcase;
    return res;
endfunction

```

*Gt : signedGT(a,b);*

```
function Bool signedGT(Word a, Word b);
    Int#(32) aInt = unpack(a);
    Int#(32) bInt = unpack(b);
    return aInt > bInt;
endfunction

```

*unpack  
cast word into 32-bit int*

### Problem 6.

Assume that `aluBr` has been replaced with the new branch ALU function, `newAluBr`, shown below. This new branch ALU is controlled by two control signals: `newBrFunc` and `negate`. When the result of this function is true, the next PC is going to be computed as `pc + imm`.

```
typedef enum {Eq, Lt, Ltu} NewBrFunc;

function Bool newAluBr(Word a, Word b, NewBrFunc newBrFunc, Bool negate);
    Bool res = case (newBrFunc)
        Eq:      (a == b);
        Lt:      signedLT(a, b);
        Ltu:     (a < b);
    endcase;
    return negate ? !res : res;
endfunction
```

- A) Fill in the decoding table below to specify what the control signals should be for each `funct3`. Write an “X” in the table for entries that don’t matter. (Use the ISA [reference card](#) from the course website.)

| funct3 | newBrFunc | negate |
|--------|-----------|--------|
| 3'b000 |           |        |
| 3'b001 |           |        |
| 3'b010 |           |        |
| 3'b011 |           |        |
| 3'b100 |           |        |
| 3'b101 |           |        |
| 3'b110 |           |        |
| 3'b111 |           |        |

### **Problem 7. (Quiz Problem 6 From Spring 20)**

Giuseppe is writing a large RISC-V assembly program, and is tired of getting confused by how to properly maintain the stack pointer **sp**. He's heard that other ISAs have **push** and **pop** instructions, which handle both allocating/freeing the space for a word on the stack, and storing it from/loading it to a register. Giuseppe decides that he'd like to implement them in the RISC-V ISA. Their syntaxes are as follows:

And the following is a Python-like description of how each works:

push:

$$\begin{aligned} \text{reg[sp]} &= \text{reg[sp]} - 4 \\ \text{Mem}[\text{reg[sp]}] &= \text{reg[rs2]} \end{aligned}$$

pop:

$$\begin{aligned} \text{reg[rd]} &= \text{Mem}[\text{reg[sp]}] \\ \text{reg[sp]} &= \text{reg[sp]} + 4 \end{aligned}$$

(A) (4 points) Giuseppe hopes to be able to simply add some new signals as inputs to the selection muxes in the existing RISC-V processor diagram. For each instruction being added, can this be done with **the existing processor components as described in lecture and shown on the following page?** If not, describe the limitation that prevents it.

(Label: 6A push) Can push be implemented with existing components?

(Label: 6A pop) Can pop be implemented with existing components?

Giuseppe's friend Jeffrey tells him that the **push** and **pop** instructions can cause issues when pipelining his processor later on. Giuseppe, knowing nothing about processor pipelining, decides to take his word for it and find a different solution to his problem.

His goal now is to remove a large amount of the uses of stack, so that he doesn't have as many opportunities to become confused. The main limitation that causes him to use the stack so much is that he's running out of registers to hold temporary values. In order to overcome this, he wants to combine multiple steps into a single instruction. In particular, he wants to combine the **ANDing** of two registers and **XORing** the result with an immediate into a single instruction. All immediates he uses are 10 bits or less, so he comes up with the following instruction syntax, definition, and encoding:

`andxori rd, rs1, rs2, imm`

`andxori:`

$$\text{reg}[rd] = ((\text{reg}[rs1] \& \text{reg}[rs2]) \wedge \text{signExtend}(imm))$$

| 31...25  | 24...20 | 19...15 | 14...12  | 11...7 | 6...0   |
|----------|---------|---------|----------|--------|---------|
| imm[9:3] | rs2     | rs1     | imm[2:0] | rd     | 0110100 |

(B) (2 points) Encode the following instructions as 32-bit binary words:

`andxori x7, x1, x16, 0x1BC`

**(Label: 6B) Encoding in hexadecimal 0x: \_\_\_\_\_**

In the diagram below, Giuseppe has decoded and sign extended the immediate used for the `andxori` instruction, labelling this signal `immA`. He also added an additional dedicated XOR module used in the instruction's computation. The output of this new XOR module is labelled `axRes`.

*(Logic for computing PC for branches and jumps omitted)*



(C) (3 points) For each of the following signals, does the mux being controlled by that signal need an extra input to accommodate the new instruction? If so, indicate the name of **the signal that needs to be added as an input to the mux**. If not, indicate which existing value of **the mux control signal** is required to make the instruction work properly.

(Label: 6C\_BSEL\_1) BSEL:      Needs new input?      YES      NO

(Label: 6C\_BSEL\_2) New input/Existing control signal: \_\_\_\_\_

(Label: 6C\_WDSEL\_1) WDSEL:      Needs new input?      YES      NO

(Label: 6C\_WDSEL\_2) New input/Existing control signal: \_\_\_\_\_

(Label: 6C\_WERFSEL\_1) WERFSEL:      Needs new input?      YES      NO

(Label: 6C\_WERFSEL\_2) New input/Existing control signal: \_\_\_\_\_

(D) (3 points) Additionally, decide for each of the following control signals what their values should be when executing the **andxori** instruction. If the value of the signal doesn't matter, then put **N/A**. The possible values for each signal are provided below.

**AluFunc:** Add, Sub, And, Or, Xor, Slt, Sltu, Sll, Srl, Sra

**BrFunc:** Eq, Neq, Lt, Ltu, Ge, Geu

**MWR:** Read, Write

(Label: 6D\_alufunc) AluFunc: \_\_\_\_\_

(Label: 6D\_brfunc) BrFunc: \_\_\_\_\_

(Label: 6D\_mwr) MWR: \_\_\_\_\_

