Skip to content

I guess Meh About Sums it Up – Branch Optimization IV

February 14, 2018

The linear version of the branch optimization works pretty well now. I ran it on the Dhrystone code with a python profiler where it took .1 sec to shorten 100 of 144 branches in 8000 lines of code. On the same code the copt peephole optimizer did 119 optimizations on the 3000 lines of macro code it read. Probably safe to say that the copt changes saved about five or ten times as much as the branch optimizer since the peephole is usually taking code out at the macro level so several machine instructions a go and the optimized branches really only take out the equivalent of 1/2 instruction. If I could do it in a single pass or as part of copt fine but on its own maybe not worth it.

I ran a quick comparison of the linear version with the recursion version and there’s just no contest. The recursion version ran much much longer. I also accidentally overwrote the recursive version which i had not versioned so that’s that!

#18-02-10 linear two pass scan - no recursion
#18-02-13 trying to include aligns with jumps.
import sys
sys.path.append('/lcc42/examples/branchanal/')
from opdefsNW import *
from opfuncsNW import *
asminfile = open(sys.argv[1],'r')
asmdata = asminfile.read().expandtabs()
asmlines = asmdata.split('\n')
print len(asmlines), 'lines read'
jumpcount=0;aligncount=0
repcount=0
progsize=0
i=0
tln=0;pln=0;mln=0 #temp label numbers for $$,+ -
labeldefs={}
labelrefs=[]
def ireplace(old, new, text): #    Replace case insensitive
    index_l = text.lower().index(old.lower())
    return text[:index_l] + new + text[index_l + len(old):] 
def gotlabel(token):
	global tln,pln,mln
	label=token.split(':')[0]
	if label.startswith("$$"):
		label+=str(tln)
	elif label.startswith("+"): #check for temporary label
		label+=str(pln)
		pln+=1
	elif label.startswith("-"): #check for temporary label
		mln+=1
		label+=str(mln)
	else:
		tln+=1
	labeldefs[label]=progsize

def fmtlabel(label):
	global tln,pln,mln
	if label.startswith("$$"):
		label+=str(tln)
	elif label.startswith("+"): #check for temporary label
		label+=str(pln)
	elif label.startswith("-"): #check for temporary label
		label+=str(mln)
	return label

for line in asmlines:
	aline=line.split(";")[0] #get rid of any comment portion
	tokens=aline.split();
	if tokens:
		#print "%d %X %s" % (i+1,progsize,aline)
		if (not aline.startswith(' ')) or tokens[0].endswith(':'): #if it has a label
			gotlabel(tokens[0])
			tokens=tokens[1:]			#get rid of it
		if tokens:
			if tokens[0].lower() in jumpdefs:
				jumpcount+=1
				labelrefs.append([i,progsize,tokens[0].lower(),fmtlabel(tokens[1].split(",")[0])])
			elif tokens[0].lower()=="align":
				aligncount+=1
				labelrefs.append([i,progsize,tokens[0].lower(),tokens[1].split(",")[0],process(tokens,progsize)])
				#print "align ",i,progsize,tokens[0].lower(),tokens[1].split(",")[0],process(tokens,progsize)
			progsize+=process(tokens,progsize)

	i+=1;
print "pass 1 completed. ",len(labeldefs)," labels found. ",len(labelrefs)," jumps found."
#print labelrefs,labeldefs
adjb=0;adjl=0
repacount=0
i=0
for ref in labelrefs: # line index, location, branch op(or align), label referenced(or alignment)
	line=ref[0]; loc=ref[1]; brop=ref[2]; label=ref[3]
	adjamt=0
	if brop=="align":
		repacount+=1
		newsize=process([brop,label],loc)
		adjamt=newsize-ref[4]
		labelrefs[i][4]=newsize
		#print "*A*%4d %4x %s" %(line+1,loc,brop), label, ref[4],newsize, adjamt
	elif (loc+1)//256==labeldefs[label]//256:
		asmlines[line]=ireplace(brop,jumpreps[brop],asmlines[line])
		#print "%4d %4x %s %4x" %(line+1,loc,asmlines[line],labeldefs[label])
		repcount+=1
		adjamt=-1
	if not adjamt==0:
		for adjref in labelrefs: # line index, location, branch op, label referenced
			if adjref[0]>line: # for jumps further down the way
				if adjref[2]=="align":
					pass #print "A%d %x "%(adjref[0],adjref[1]),adjref[2],adjref[3],adjref[4]
				adjref[1]+=adjamt #get adjusted
				adjb+=1
		for k, labloc in labeldefs.iteritems():
			if labloc>loc:
				labeldefs[k] +=adjamt
				adjl+=1
	i+=1
#print labeldefs
#for k,labloc in labeldefs.iteritems():
#	print "%4x %s" % (labloc,k)
print repcount,"+",repacount," fixup cycles ",adjb," branch fixups ",adjl," label fixups"
asmoutfile=open(sys.argv[1].split('.')[0]+".basm",'w'); asmoutfile.truncate()
i=0
for line in asmlines:
	asmoutfile.write(line+'\n')
	i+=1
print i, 'lines written'
print "%d long jumps found, %d shortened" % (jumpcount,repcount)
##################opdefsNW follows
opsizes={
	'or':1,'ori':2,'xor':1,'xri':2,
	'and':1,'ani':2,
	'out':1,'inp':1,
	'sep':1,'ldi':2,'plo':1,'phi':1,'glo':1,'ghi':1,
	'sm':1,'smb':1,'smbi':2,
	'sd':1,'sdb':1,'sdi':2,'sdbi':2,
	'skp':1,'lskp':1,'lsnf':1,'lsdf':1,'lsz':1,'lsnz':1,
	'sex':1,'lda':1,'ret':1,'nop':1,'dec':1,'stxd':1,
	'shr':1,'shrc':1,'shl':1,'shlc':1,
	'str':1,'ldn':1,
	'add':1,'adi':2,'adc':1,'adci':2,'str':1,'smi':2,'inc':1,
	'dis':1,'sav':1,
	'bz':2,'bnz':2,'br':2,'b3':2,'bn3':2,'bnf':2,'bdf':2,
        'cpy2': 4, 'cpy1': 2, 'cpy4': 8,
        'zext': 3,'sext': 9,'zext4':4,'sext4':11,
        'negi2':9,'negi4':32,
        'alu2':12,'alu2i':8,
        'alu4':22,'alu4i':16,
        'ldad': 6,
        'lda2': -8,
        'shl2i': -6,'shri2i': -8,'shru2i': -6,
        'shl4':12,'shri4':14,
        'shl4i': -12,'shri4i': -14,
        'st1':-10,'st2':-13,'str1':2,
        'ld2':-12,'ld1':-10,
        'ldn1':2,'ldn2':4,
        'jzu2':8,
        'jnzu2':8,'jnzu1':4,'sjnzu2':6,'sjnzu1':3, #the sj codes are short branch variants
        'jeqi2':18,'jcu2':13,'jneu2': 18,
        'jci2': 20,'jci4': 28,'jcu4': 28,
        'jcu2i':9,
        'jni2i':17,'jnu2i':9,
        'jneu2i': 12,
        'jneu4':39,
        'jequ2i':12,'jci2i':17,
        'jcf4':69,
        'ld2z':4,
        'ldi4':12,'st4':-19,'ld4':-16, 
        'incm': -1,'decm': -1,
        'popr': 5, 'pushr': 4,
        'popf': 5, 'popl': 4,
        'lbr': 3,'lbnz':3,'lbz':3,'lbnf':3,'lbdf':3,
        'equ': 0, 'db': -1,  'dw':2,'dd':4,
        'align':-1, #force alignment
        'cretn': 1,'ccall': 3,
         'seq': 1,   'req': 1,
        'listing': 0,   'include': -3,
        'release': -1,'reserve': -1,
        'jumpv':10,
        'relaxed':0, 'macexp': 0,
        'ldx':1,'ldxa':1,'irx':1,
        'org':-1}
        
jumpdefs={
	'lbr': 3,'lbnz':3,'lbz':3,'lbdf':3,'lbnf':3
	}
jumpreps={
	'lbr': 'br','lbnz':'bnz','lbz':'bz','lbdf':'bdf','lbnf':'bnf'	
	}

#######################opfuncsNW follows
from opdefsNW import *
def opsizer(opsize,tokens,currentpc):
    op=tokens[0].lower()
    operands=tokens[1].split(",")
    #print "op=",op," operands=",operands
    if op=='reserve':
        n=int(operands[0])
        if (n<9):
              return n
        else:
              return 10
    elif op=='release':
        n=int(operands[0])
        if (n",operands
        if len(tokens)>=3 and tokens[2]=='dup':
              return int(tokens[1]) #tokens[1] because dup is separated from the count by a space
        else:
	      #print "db length is ",len(operands)
              return len(operands)
    elif op=='include':
        if operands[0].lower().startswith('lcc1802prolo'):
            return 3
        else:
            if operands[0].startswith('lcc1802epilo'):
                epiloglocation=currentpc
            return 0
    elif op in ['ld2','ld1','lda2','st2','st1','ld4','st4']: #ops that can be offset or direct storage refs
    	 #print operands
         if operands[1].lower()=="'o'": #offset/index
            return opsize*-1 #returns the whole value
         else:
            return (opsize*-1)-2 #direct op is 2 smaller
    elif op in ['shl2i','shri2i','shlu2i','shri2i','shru2i','shl4i','shri4i']:
         return int(operands[1])*-1*opsize
    elif op in ['incm','decm']:
         return int(operands[1])*-1*opsize
    elif op=='align':
        boundary=int(operands[0])
        if (currentpc%boundary)==0:
            return 0
        else:
            return boundary-(currentpc%boundary)
    elif op=='org':
    	print "I Hope this ORG is not harmful! ",operands[0]
    	return 0
    else:
        print '**************opsizer oops',tokens,opsize
        x = raw_input("Press Enter to continue")
       
            
        
    return 999
    
def process(tokens,currentpc):
    global opdefs
    try:
        #print 'processing ',tokens,"**",tokens[0],"**",opsizes[tokens[0]],currentpc
        opsize=opsizes[tokens[0].lower()]
    except:
        print 'process oops', tokens
        opsize=0
        print tokens[0],opsizes
        x = raw_input("Press Enter to continue")

    if opsize>=0:
        thisopsize=opsize
    else:
        thisopsize=opsizer(opsize,tokens,currentpc)
    #print tokens[0],opsize,thisopsize
    return thisopsize

Advertisements

From → Uncategorized

Leave a Comment

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out /  Change )

Google photo

You are commenting using your Google account. Log Out /  Change )

Twitter picture

You are commenting using your Twitter account. Log Out /  Change )

Facebook photo

You are commenting using your Facebook account. Log Out /  Change )

Connecting to %s

%d bloggers like this: