-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathumipsasm.pl
410 lines (386 loc) · 13.7 KB
/
umipsasm.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
#!/usr/bin/perl -W
use Getopt::Std;
# CS 0447 Spring 2019
$version = "v0.9.3 03/20/19";
#
# wilkie
# modified: 03/20/19, 2:30pm v0.9.3 - renumbered opcode assignments
# Luis Oliveira
# modified: 09/10/18, 2:50pm v0.9.2 - renumbered opcode assignments
# Bruce Childers
# modified: 03/21/17, 9:22pm v0.9.1 - added sll, srl instructions
# modified: 04/11/16, 7:57pm v0.9c - fixed label bug due to unordered hashes
# modified: 03/17/16, 1:47pm v0.9
# modified: 03/04/15, 2:24pm v0.8
# modified: 11/06/14, 5:40pm v0.7
# modified: 03/29/14, 3:11pm v0.6
# modified: 11/09/12, 11:02pm v0.5
# modified: 04/06/12, 12:23pm v0.4.1
# modified: 04/01/12, 3:34pm v0.4.0
# modified: 12/01/11, 10:12pm
# modified: 11/20/11, 4:44pm
# modified: 11/16/11, 9:47pm
# modified: 03/26/11, 10:57pm
# modified: 11/17/10, 10:02pm
# original version: 11/27/09, 8:42pm
#
# 03/17/16
# * added div and mul instructions
# * added mlo and mhi instructions
# 03/04/15
# * renumbered opcode assignments
# 11/06/14
# * reorganized opcode assignments
# 03/29/14
# * reorganized opcode assignments
# 11/09/12
# * improved/changed opcode assignments
# 04/06/12
# * fixed problem where any I-format instruction wasn't updated to
# shift the immediate left by 1. the subop was moved to the lsb
# to make it consistent with the R format, so all instructions that
# used I-format needed their immediate left shifted by 1.
# 04/01/12
# * updated new opcodes / subopcode
# 12/01/11
# * fixed typo in opcode for ADDUI
# 11/20/11
# * fixed typos in the opcodes
# 11/16/11
# * changed to conform to improved opcode encoding scheme
# * added improved branch support with bx instruction (branch not equal)
# * added improved jump support with j instruction (jump absolute)
# 03/26/11 - changed for new instructions, new registers
# * added two-operand instructions
# * added eight registers (0-7)
# * added bp, bn, bz instructions
# * changed put format
# * deleted get instruction
# * added support for .text and .data directives
# * now supports a data section
# * added hex numbers
# * added pseudo-instructions: la, li, clr
# 11/17/10 - added new instructions
# * get $rt,port instruction
# * put $rt,port instruction
# 11/27/09 - original version
#
# This is a rudimentary assembler for the µMIPS project. It will accept a very simple
# assembly language syntax, resembling MIPS syntax. see the umipsasm.pdf manual.
#
# A few rules:
# 1) labels start with a letter and always end with :
# 2) ; or # is a comment
# 3) registers are $r0-$r7 - the 'r' is required (unlike in MIPS where $0 is OK)
# 4) labels are case insensitive (input is entirely converted to uppercase below)
# 5) the output in default mode (don't specify -v) can be read by Logisim ROM/RAM modules
# 6) the data width for the instruction ROM must be 16 bits
# 7) hexadecimal numbers are not supported for constants (immediates)
#
# This is an unsupported program! It's provided for your convenience. Feel free to
# modify it as necessary to fix bugs, add instructions, etc. If you find any problems, or
# make useful changes, please let me know!
#
if (($#ARGV+1)==0) {
print "usage: asm.pl [-vbpdm] filename\n";
print "µMIPS assembler $version\n";
print " -d output data section for loading into RAM\n";
print " -v display verbose information\n";
print " -p don't use pseudo-instructions\n";
print " -x print debug information\n";
print " -l dump labels and their addresses\n";
print " warning: the v2 assembly language is incompatible with past versions (v0, v1)\n";
exit;
}
getopts("vpdlx",\%options); # set allowed options in Getopt package
# mappings of register syntatic entities to their actual codes
%regs = (R0 => 0x0, R1 => 0x1, R2 => 0x2, R3 => 0x3,
R4 => 0x4, R5 => 0x5, R6 => 0x6, R7 => 0x7);
# synonym table for registers; this lets the assembly use $t0-$t3, which is compatible with MARS.
%rsyn = (R0 => R0, R1 => R1, R2 => R2, R3 => R3, R4 => R4, R5 => R5, R6 => R6, R7 => R7,
T0 => R0, T1 => R1, T2 => R2, T3 => R3, T4 => R4, T5 => R5, T6 => R6, T7 => R7);
# mapping of opcode mnemonics to their actual codes
%mnemonic = (AND => 0x1000,
NOR => 0x1001,
ADDI => 0x8001,
ADDUI => 0x8000,
ADD => 0x0000,
SUB => 0x0001,
DIV => 0x2000,
MUL => 0x2001,
SLLV => 0x3001,
SRLV => 0x3000,
BX => 0xB000,
BZ => 0xB001,
BP => 0xA000,
BN => 0xA001,
LW => 0x4000,
SW => 0x4001,
LI => 0x9000,
JAL => 0xC000,
JR => 0x5000,
J => 0xD000,
PUT => 0x7000,
HALT => 0x6000);
emitheader();
# pass 1: process input, get labels, set label addresses
$addr = 0;
$daddr = 0;
$sect = 0; # 0 is text section (default), 1 is data section
while (<>) {
$_=uc($_); # convert to all uppercase
# convert hex numbers (starting with 0X) to decimal
# this is done by evaluating the perl replacement as code
# the code converts the matching number to decimal and prepends a space
# note: this is intended to only work with numbers as the last operand or
# following a label, which is true in the current assembly language
$_ =~ s/([:,])\s*0X([0-9A-F]+)\s*/$1 . " ".hex($2)/eg;
if (defined $options{x}) {
print "DEBUG: $_";
}
# check for .data directive, switch to data segment processing
if (/(\.DATA)/) {
$sect = 1;
}
# check for .text directive, switch to text segment processing
elsif (/(\.TEXT)/) {
$sect = 0;
}
# process data section - get label, assign it next data address, get its value
elsif ($sect == 1) {
# data declarations are of the form:
# label: value
# all values are word (16-bit) values!
if (/(\w+):\s+(-?\d+)\s*/) {
$dlabel{$1}=$daddr; # assign the label an address
$dvalue{$daddr}=($2&0xffff); # the value for the label
$daddr++; # increment to next address
}
}
# processing text section - get labels, source line, etc.
elsif ($sect == 0) {
push(@source,$_);
if (/(\w+):/) {
$label{$1}=$addr;
s/\w+://;
}
# be careful: this matches any input string for an opcode, but it should cause an error below
if (/^\s*[A-Z]+/) {
# FIXME! these pseudos should really be put into a table, with the address increment amount
# if I add more pseudos, I'll fix this.
if (/^\s*OR/ && (!defined $options{p})) {
# pseudo-instruction -- takes 2 actual instructions
$addr++;
}
# elsif (/^\s*(LA|MOV)/ && (!defined $options{p})) {
# # pseudo-instruction -- takes 2 actual instructions
# $addr++;
# }
elsif (/^\s*(MOV)/ && (!defined $options{p})) {
# pseudo-instruction -- takes 2 actual instructions
$addr++;
}
$addr++;
}
}
}
# output the data section (for loading into RAM), if requested
if (defined $options{d}) {
# print the value for each label; the labels are located at sequential addresses,
# starting at address 0
# foreach $lab (sort keys %dlabel) {
# printf "%04X\n", $dvalue{$dlabel{$lab}};
#}
# BRC fixed 4/11 - print ordered by address
my @lvalue= ();
my $idx = 0;
foreach $lab (sort keys %dlabel) {
$lvalue[$dlabel{$lab}] = $dvalue{$dlabel{$lab}}; # put value at address of label
$idx++;
}
# emit in order of addresses
for ($i = 0; $i < $idx; $i++) {
printf "%04X\n", $lvalue[$i];
}
# just print the data section and exit when -d is used
exit;
}
# print the labels and their addresses for -l option
if (defined $options{l}) {
# dump the symbol table (of labels)
print "** Code labels:\n";
foreach $lab (sort keys %label) {
printf "%-8s: %04X\n", $lab, $label{$lab};
}
print "** Data labels:\n";
foreach $lab (sort keys %dlabel) {
printf "%-8s: %04X %04X\n", $lab, $dlabel{$lab}, $dvalue{$dlabel{$lab}};
}
# just print the labels and exit when -l is used
exit;
}
# pass 2: emit encoded instructions and resolved labels
# a bunch of ugly regexps are used to match individual instructions - they can match stuff that isn't
# actually legal assembly, so be careful.
$addr=0;
foreach (@source) {
$line = $_;
if (defined $options{x}) {
print "DEBUG2: $line\n";
}
chomp $line;
# strip labels
s/\w+://;
if (/^\s*PUT\s+\$([RT][0-7])\s*,\s*(-?\d+)/) {
emit($addr++,$mnemonic{PUT}+($regs{$rsyn{$1}}<<9)+(($2 & 0xff)<<1), $line);
}
elsif (/^\s*PUT\s+\$([RT][0-7])\s*/) {
emit($addr++,$mnemonic{PUT}+($regs{$rsyn{$1}}<<9)+((0 & 0xff)<<1), $line);
}
elsif (/^\s*(ADD|SUB|AND|NOR|SLLV|SRLV|DIV|MUL)\s+\$([RT][0-7])\s*,\s*\$([RT][0-7])\s*/) {
emit($addr++,$mnemonic{$1}+($regs{$rsyn{$2}}<<9)+($regs{$rsyn{$3}}<<6), $line);
}
elsif (/^\s*(SLL|SRL)\s+\$([RT][0-7])\s*,\s*(-?\d+)/) {
emit($addr++,$mnemonic{$1}+($regs{$rsyn{$2}}<<9)+(($3 & 0xf)<<1), $line);
}
elsif (/^\s*HALT/) {
emit($addr++,$mnemonic{HALT}, $line);
}
elsif (/^\s*(ADDI|ADDUI|SET|SETR|LI)\s+\$([RT][0-7])\s*,\s*(-?\d+)/) {
emit($addr++,$mnemonic{$1}+($regs{$rsyn{$2}}<<9)+(($3 & 0xff)<<1), $line);
}
# immediate instructions with a label operand is loading an address specified by a data label
elsif (/^\s*(ADDI|ADDUI|SET|SETR)\s+\$([RT][0-7])\s*,\s*(\w+)/) {
if (!defined $dlabel{$3}) {
print "Undefined data address label $3\n";
exit;
}
emit($addr++,$mnemonic{$1}+($regs{$rsyn{$2}}<<9)+(($dlabel{$3} & 0xff)<<1), $line);
}
# branch instructions: bn $reg,imm
elsif (/^\s*(BN|BP|BZ|BX)\s+\$([RT][0-7])\s*,\s*(\w+)/) {
if (!defined $label{$3}) {
print "Undefined label $2\n";
exit;
}
emit($addr++,$mnemonic{$1}+($regs{$rsyn{$2}}<<9)+(($label{$3} & 0xff)<<1), $line);
}
# NOT pseudo operation: not $reg
elsif (/^\s*NOT\s+\$([RT][0-7])\s*/) {
if (!defined $options{p}) {
emit($addr++,$mnemonic{NOR}+($regs{$rsyn{$1}}<<9)+($regs{$rsyn{$1}}<<6), $line);
}
else {
print "WARNING: Ooops! No pseudos are allowed but I found NOT.\n";
}
}
# OR pseudo operation: or $reg,$reg
elsif (/^\s*OR\s+\$([RT][0-7])\s*,\s*\$([RT][0-7])\s*/) {
if (!defined $options{p}) {
emit($addr++,$mnemonic{NOR}+($regs{$rsyn{$1}}<<9)+($regs{$rsyn{$2}}<<6), $line);
emit($addr++,$mnemonic{NOR}+($regs{$rsyn{$1}}<<9)+($regs{$rsyn{$1}}<<6), "");
}
else {
print "WARNING: Ooops! No pseudos are allowed but I found OR.\n";
}
}
# clear register pseudo (set to 0): clr $reg
elsif (/^\s*CLR\s+\$([RT][0-7])\s*/) {
if (!defined $options{p}) {
emit($addr++,$mnemonic{SUB}+($regs{$rsyn{$1}}<<9)+($regs{$rsyn{$1}}<<6), $line);
}
else {
print "WARNING: Ooops! No pseudos are allowed but I found CLR.\n";
}
}
# load address pseudo operation: la $reg,dlabel
elsif (/^\s*LA\s+\$([RT][0-7])\s*,\s*(\w+)\s*/) {
if (!defined $options{p}) {
# clear the register
# emit($addr++,$mnemonic{SUB}+($regs{$rsyn{$1}}<<9)+($regs{$rsyn{$1}}<<6), $line);
# load address from data labels into the register
# emit($addr++,$mnemonic{ADDUI}+($regs{$rsyn{$1}}<<9)+(($dlabel{$2} & 0xff)<<1), "");
emit($addr++,$mnemonic{LI}+($regs{$rsyn{$1}}<<9)+(($dlabel{$2} & 0xff)<<1),$line);
}
else {
print "WARNING: Ooops! No pseudos are allowed but I found LA.\n";
}
}
# elsif (/^\s*LI\s+\$([RT][0-7])\s*,\s*(-?\d+)/) {
# if (!defined $options{p}) {
# # clear the register
# emit($addr++,$mnemonic{SUB}+($regs{$rsyn{$1}}<<9)+($regs{$rsyn{$1}}<<6), $line);
# # load the immediate
# emit($addr++,$mnemonic{ADDUI}+($regs{$rsyn{$1}}<<9)+(($2 & 0xff)<<1), "");
# }
# else {
# print "WARNING: Ooops! No pseudos are allowed but I found LI.\n";
# }
# }
# MOV pseudo operation: mov $reg0,$reg1 - reg1 is copied to reg2
elsif (/^\s*MOV\s+\$([RT][0-7])\s*,\s*\$([RT][0-7])\s*/) {
if (!defined $options{p}) {
# clear the register
emit($addr++,$mnemonic{SUB}+($regs{$rsyn{$1}}<<9)+($regs{$rsyn{$1}}<<6), $line);
# move over the register by adding it
emit($addr++,$mnemonic{ADD}+($regs{$rsyn{$1}}<<9)+($regs{$rsyn{$2}}<<6), "");
}
else {
print "WARNING: Ooops! No pseudos are allowed but I found MOV.\n";
}
}
elsif (/^\s*(SW|LW)\s+\$([RT][0-7])\s*,\s*\$([RT][0-7])\s*/) {
emit($addr++,$mnemonic{$1}+($regs{$rsyn{$2}}<<9)+($regs{$rsyn{$3}}<<6), $line);
}
elsif (/^\s*JAL\s+\$([RT][0-7])\s*,\s*(\w+)/) {
emit($addr++,$mnemonic{JAL}+($regs{$rsyn{$1}}<<9)+(($label{$2} & 0xff)<<1), $line);
}
elsif (/^\s*JR\s+\$([RT][0-7])/) {
emit($addr++,$mnemonic{JR}+($regs{$rsyn{$1}}<<9), $line);
}
elsif (/^\s*(MLO|MHI)\s+\$([RT][0-7])/) {
emit($addr++,$mnemonic{$1}+($regs{$rsyn{$2}}<<9), $line);
}
elsif (/^\s*J\s+(\w+)/) {
emit($addr++,$mnemonic{J}+(($label{$1} & 0xff)<<1), $line);
}
elsif (/^\s*[\#;]?\s+/) {
# yummy: a comment or blank line has been eaten!
}
else {
print "ERROR: $line\n";
exit;
}
}
emittrailer();
exit;
# subroutine to output the instruction
sub emit {
if (defined $options{v}) {
# verbose mode shows address, encoded instruction, and instruction string
printf "%04X:%04X\t$_[2]\n", $_[0], $_[1];
}
else {
# normal mode just shows the encoded instruction for Logisim's ROM file
printf "%04X\n", $_[1];
}
}
# output the header
sub emitheader {
if (defined $options{v} || defined $options{l}) {
print "µMIPS assembler $version\n";
}
else {
# header in memory data file required by Logisim
print "v2.0 raw\n";
print "\# µMIPS ISA version $version\n";
print "\# to load this file into Logisim:\n";
print "\# 1) save the output from the assembler to a file\n";
print "\# 2) use the poke tool in Logisim and control-click the ROM/RAM component\n";
print "\# 3) select Load Image menu option\n";
print "\# 4) load the saved file\n";
}
}
# output trailer at end of file
sub emittrailer {
}