Skip to content

Speeding up the Bagels Server II

July 19, 2014

I thought about rewriting the whole wiz_WriteN routine in assembly but it’s tricker than you’d think and seems fraught with possible errors at the start and in later maintenance. I started by replacing just the enable/disable sequence and the call to spisendN with inline assembly as follows:

void wiz_WriteNf(unsigned int offaddr,unsigned char *data, unsigned int N)
{
  //printf("\nwiz_WriteN(%x,%s,%d) ",offaddr,data,N);
  SPINsequence.select=PIN4&selectmask;
  SPINsequence.deselect=PIN4&deselectmask;
  SPINsequence.opcode=WIZNET_WRITE_OPCODE; //prep for sequence of writes
  while(N>0){//for all the bytes to be transferred
  	SPINsequence.realaddr=TXBUFADDR+(offaddr&TX_BUF_MASK);
  	SPINsequence.data=*data;
  	asm("	ldaD memaddr,_SPINsequence\n" //address the work area
  		"	sex memaddr\n"	//going to do output direct from there
  		"	out 4\n"		//select the shield
  		"	out 6\n	out 6\n	out 6\n	out 6\n" //output the payload
  		"	out 4\n"		//deselect the
  		"	sex 2\n");		//reset the X register

  	offaddr++;data++;N--;
  }
}

That only got me down to about 50 instructions per payload byte because even though I moved instructions out of the loop and eliminated a call/return sequence, there’s still some hefty compiled sequences inside the loop.

	SPINsequence.realaddr=TXBUFADDR+(offaddr&TX_BUF_MASK);
translates to almost 30 1802 instructions as
	alu2I R11,R12,2047,ani,ani ;removed copy;BANDU2(reg,con)
	ldA2 R11,'O',R11,(16384); reg:addr
	st2 R11,'D',(_SPINsequence+2),0; ASGNU2(addr,reg)

If I hold my nose I can convert that to

void wiz_WriteNf(unsigned int offaddr,unsigned char *data, unsigned int N)
{
  //printf("\nwiz_WriteN(%x,%s,%d) ",offaddr,data,N);
  SPINsequence.select=PIN4&selectmask;
  SPINsequence.deselect=PIN4&deselectmask;
  SPINsequence.opcode=WIZNET_WRITE_OPCODE; //prep for sequence of writes
  asm("	ldaD memaddr,_SPINsequence+4\n"	\\point to the data element in the work area
  	"	sex memaddr\n");	//prepare to do output from there
  while(N>0){//for all the bytes to be transferred
  	//SPINsequence.data=*data++;
  	asm("	lda	R13\n"	//pick up a byte of data
  		"	stxd\n");	//store it and back up to the address

  	//SPINsequence.realaddr=TXBUFADDR+(offaddr&TX_BUF_MASK);
  	asm("	glo R12\n"	//low byte of address doesn't change
  		"	stxd\n" //put it in place and back up
  		"	ghi R12\n"	//offaddr is in Reg 12
  		"	ani 2047>>8\n" //wrap around if needed
  		"	ori 16384>>8\n" //keep in the buffer area
  		"	stxd\n"	//now points to the write opcode
  		"	dec memaddr\n"); //back to the beginning of the work area

  	asm("	out 4\n"		//select the shield
  		"	out 6\n	out 6\n	out 6\n	out 6\n" //output the payload
  		"	out 4\n");		//deselect the shield
  	offaddr++;N--;
  }
  asm("	sex 2\n");		//reset the X register
}

That’s only about 25 instructions inside the loop. Now, will it work? No, of course not. But some debugging later, I ended up with

void wiz_WriteNf(unsigned int offaddr,unsigned char *data, unsigned int N)
{
  //printf("\nwiz_WriteN(%x,%s,%d) ",offaddr,data,N);
  SPINsequence.select=PIN4&selectmask;
  SPINsequence.deselect=PIN4&deselectmask;
  SPINsequence.opcode=WIZNET_WRITE_OPCODE; //prep for sequence of writes
  asm("	ldaD memaddr,_SPINsequence+4\n"	//point to the data element in the work area
  	"	sex memaddr\n");	//prepare to do output from there
  while(N>0){//for all the bytes to be transferred
  	//SPINsequence.data=*data++;
  	asm("	lda	R13\n"	//pick up a byte of data
   		"	stxd\n");	//store it and back up to the address

  	//SPINsequence.realaddr=TXBUFADDR+(offaddr&TX_BUF_MASK);
  	asm("	glo R12\n"	//low byte of address doesn't change
  		"	stxd\n" //put it in place and back up
  		"	ghi R12\n"	//offaddr is in Reg 12
  		"	ani 2047>>8\n" //wrap around if needed
  		"	ori 16384>>8\n" //keep in the buffer area
  		"	stxd\n"	//now points to the write opcode
  		"	dec memaddr\n"); //back to the beginning of the work area

  	asm("	out 4\n"		//select the shield
  		"	out 6\n	out 6\n	out 6\n	out 6\n" //output the payload
  		"	out 4\n");		//deselect the shield
  	asm("	dec memaddr\n	dec memaddr\n");//point back to the data byte
  	offaddr++;N--;
  }
  asm("	sex 2\n");		//reset the X register
}

which compiled to

_wiz_WriteNf:		;framesize=4
	pushr R7
	ld2 R7,'O',sp,(8) ;reg:INDIRU2(addr)
;{
;  SPINsequence.select=PIN4&selectmask;
	ldaD R11,_SPINsequence; reg:acon
	ld1 R10,'D',(_PIN4),0
	zExt R10 ;CVUI2: widen unsigned char to signed int (zero extend)
	alu2I R10,R10,127,ani,ani
	;removed ?	cpy2 R10,R10
	str1 R10,R11; ASGNU1(indaddr,reg)		DH
;  SPINsequence.deselect=PIN4&deselectmask;
	ldaD R11,_SPINsequence+5; reg:acon
	ld1 R10,'D',(_PIN4),0
	zExt R10 ;CVUI2: widen unsigned char to signed int (zero extend)
	alu2I R10,R10,128,ani,ani
	;removed ?	cpy2 R10,R10
	str1 R10,R11; ASGNU1(indaddr,reg)		DH
;  SPINsequence.opcode=WIZNET_WRITE_OPCODE; //prep for sequence of writes
	ldaD R11,_SPINsequence+1; reg:acon
	str1I 240,R11; ASGNU1(indaddr,acon)	DH
;  asm("	ldaD memaddr,_SPINsequence+4\n"	//point to the data element in the work area
	ldaD memaddr,_SPINsequence+4
	sex memaddr
	lbr L251
L250:
;  while(N>0){//for all the bytes to be transferred
;  	asm("	lda	R13\n"	//pick up a byte of data
	lda	R13
	stxd
;  	asm("	glo R12\n"	//low byte of address doesn't change
	glo R12
	stxd
	ghi R12
	ani 2047>>8
	ori 16384>>8
	stxd
	dec memaddr
;  	asm("	out 4\n"		//select the shield
	out 4
	out 6
	out 6
	out 6
	out 6
	out 4
;  	asm("	dec memaddr\n	dec memaddr\n");//point back to the data byte
	dec memaddr
	dec memaddr
;  	offaddr++;N--;
	incm R12,1
	decm R7,1
;  }
L251:
;  while(N>0){//for all the bytes to be transferred
	jnzU2 R7,L250; NE 0
;  asm("	sex 2\n");		//reset the X register
	sex 2
;}
L246:
	popr R7
	Cretn

which I make to be about 23 instructions in the while loop.

It’s pretty awful and it would be easy for a change in the code or the compiler to break it. It would probably be cleaner to do it as assembly to begin with.

Quick though. I can’t get a reliable timing by my crude methods but I guess it’s doing the 200 byte transfer in 20 ms, about 5X better than the original.

Advertisements

From → web server

Leave a Comment

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s

%d bloggers like this: