Prev: Apple II Debugger
Next: TMA Assembler?
From: Skybuck on 20 Aug 2006 10:30 Hi, *** Begin of Part 2 of Code *** // this one below looks like a promosing contender, here is what the dude had // to say about it :) { 23 AOPS, quite a big number but we are avoiding memory access completely. At 25% extra cost the width is extended to 64 bits. :) Problem: a lot of dependencies to previous statement, but atleast the left and right shift sides are possible to run concurrently (thinking of decoded instructions). jukka(a)liimatta.org He also used an inline directive... interesting to test with inline directive as well... but for now we will test without inlining ;) just to preserve the assembler code etc. by the way, we can't inline anyway I guess because the benchmarking and verifieing routines except a pointer to a function or something like that ;) so no inline for us :P* =D ;) at least not in this program ! :D } // original C code: (* inline uint32 reverse(uint32 v) { // reverse bit order // original implementation by Steve Baker v = ((v >> 1) & 0x55555555) | ((v << 1) & 0xaaaaaaaa); v = ((v >> 2) & 0x33333333) | ((v << 2) & 0xcccccccc); v = ((v >> 4) & 0x0f0f0f0f) | ((v << 4) & 0xf0f0f0f0); v = ((v >> 8) & 0x00ff00ff) | ((v << 8) & 0xff00ff00); v = (v >> 16) | (v << 16); return v; } *) // Translated to Delphi by Skybuck: function JukkaAndSteveSwapBits( const ParaByte : byte ) : byte; var v : longword; // local variable used to match prototype. begin v := ParaByte; // reverse bit order // original implementation by Steve Baker v := ((v shr 1) and $55555555) or ((v shl 1) and $aaaaaaaa); v := ((v shr 2) and $33333333) or ((v shl 2) and $cccccccc); v := ((v shr 4) and $0f0f0f0f) or ((v shl 4) and $f0f0f0f0); v := ((v shr 8) and $00ff00ff) or ((v shl 8) and $ff00ff00); v := (v shr 16) or (v shl 16); result := v shr 24; // modification by skybuck to fix verification problem. end; { Let's take there piece of code and try to change it so it's limited to bytes only. } // amazing it works :) // let's clean up the first version and produce a second version { // version 0.01 function JukkaAndSteveAndSkybuckSwapBits( const ParaByte : byte ) : byte; var v : longword; // local variable used to match prototype. begin v := ParaByte; // reverse bit order // original implementation by Steve Baker v := ((v shr 1) and $55) or ((v shl 1) and $aa); v := ((v shr 2) and $33) or ((v shl 2) and $cc); v := ((v shr 4) and $0f) or ((v shl 4) and $f0); // v := ((v shr 8) and $ff) or ((v shl 8) and $ff); // v := (v shr 16) or (v shl 16); result := v; end; } // version 0.02 a nice cooperation between Jukka, Steve and Skybuck ;) { function JukkaAndSteveAndSkybuckSwapBits( const ParaByte : byte ) : byte; begin result := ParaByte; // optimized away by compiler // reverse bit order // original implementation by Steve Baker // modified by Jukka (?) and Skybuck (!) // the code below will be optimized by the compiler to other kind of instructions ! ;) result := ((result shr 1) and $55) or ((result shl 1) and $aa); result := ((result shr 2) and $33) or ((result shl 2) and $cc); result := ((result shr 4) and $0f) or ((result shl 4) and $f0); end; } // version 0.03 I don't like hexadecimal values... most of the time it's a form of obfuscation, let's replace it with decimals. // Decimals function JukkaAndSteveAndSkybuckSwapBits( const ParaByte : byte ) : byte; begin result := ParaByte; // optimized away by compiler // reverse bit order // original implementation by Steve Baker // modified by Jukka (?) and Skybuck (!) // the code below will be optimized by the compiler to other kind of instructions ! ;) result := ((result shr 1) and 85) or ((result shl 1) and 170); result := ((result shr 2) and 51) or ((result shl 2) and 204); result := ((result shr 4) and 15) or ((result shl 4) and 240); end; // let's also decode the decimals to understand their bit patterns. // or let's just print it out :) // 85 01010101 // 170 10101010 // 51 00110011 // 204 11001100 // 15 00001111 // 240 11110000 // it's clear to me... that the above code is not really correct. // it's simply bugged or a form of obfuscation. I will therefore disqualify it ! // There will be no forms of obfuscation in my competitions. // I shall try to fix the method and algorithm so that it's no longer obfuscated ! // non-obfuscated. // *** verification failure as expected *** { function JukkaAndSteveAndSkybuckSwapBits( const ParaByte : byte ) : byte; begin // reverse bit order // original implementation by Steve Baker // modified by Jukka (?) and Skybuck (!) // the code below will be optimized by the compiler to other kind of instructions ! ;) result := ((ParaByte shr 1) and 85) or ((ParaByte shl 1) and 170); result := ((ParaByte shr 2) and 51) or ((ParaByte shl 2) and 204); result := ((ParaByte shr 4) and 15) or ((ParaByte shl 4) and 240); end; } // *** verification failure as expected *** { // de-obfuscation failed. // broken // method is definetly disqualified ;) function JukkaAndSteveAndSkybuckSwapBits( const ParaByte : byte ) : byte; begin result := ((ParaByte shr 1) and 85); result := result or ((ParaByte shl 1) and 170); result := result or ((ParaByte shr 2) and 51); result := result or ((result shl 2) and 204); result := result or ((ParaByte shr 4) and 15); result := result or ((result shl 4) and 240); end; } // let's try one more implementation the xlat thingy ;) // I had to download a special intel instruction manual... // it turns out I had the wrong manual... all this time hehe... I saved to wrong filename woooopsie ;) and ended up with two identical manuals // but it should have been two seperate manuals... a and b. Now I had a and a... saved as a and b... oopsie. // No wonder I was missing half of the instructions hehe ;) Oh well no problem anyway, since me rarely code in asm ;). // Here goes. Me wonders if BASM has xlat support hehe... otherwise we use upcodes ;) :P* var vSwapBitsLookUpTable : packed array[0..255] of byte; // first version that's working :) // version 0.01 { function SkybuckSwapBitsUsingXLAT( const ParaByte : byte ) : byte; var vPointer : pointer; begin // result := vSwapBitsLookUpTable[ ParaByte ]; vPointer := @vSwapBitsLookUpTable[0]; asm push ds push EBX mov EBX, vPoint
From: Skybuck on 20 Aug 2006 10:37 jukka(a)liimatta.org schreef: > inline uint32 reverse(uint32 v) > { > // reverse bit order > // original implementation by Steve Baker > > v = ((v >> 1) & 0x55555555) | ((v << 1) & 0xaaaaaaaa); > v = ((v >> 2) & 0x33333333) | ((v << 2) & 0xcccccccc); > v = ((v >> 4) & 0x0f0f0f0f) | ((v << 4) & 0xf0f0f0f0); > v = ((v >> 8) & 0x00ff00ff) | ((v << 8) & 0xff00ff00); > v = (v >> 16) | (v << 16); > > return v; > } > > 23 AOPS, quite a big number but we are avoiding memory access > completely. At 25% extra cost the width is extended to 64 bits. :) > > Problem: a lot of dependencies to previous statement, but atleast the > left and right shift sides are possible to run concurrently (thinking > of decoded instructions). Nice try dude, It's fast, it's working (well I had to add a shr 24 to limit it to byte) but I think it's obfuscated. I also made a byte-limited version. I come to my conclusion of obfuscation based on the byte-limited version which I derived from it. This one has been examined in the cpu window and the generated assembler instructions are much different than the C/Delphi code. Also the code does not seem to make much sense, at first glance anyway, and even after further inspection it's unclear what it does, how it works, or how it can be extended to more than 8 bits. Obfuscating is not allowed in my competitions, so this algorithm/code is disqualified ! ;) =D However it's still included in the test program, to verify and benchmark it ;) Thank you for participating, you are free to try again and this time in clearity, and then you can come again =D lol, have a nice day ! =D Bye, Skybuck.
From: Skybuck on 20 Aug 2006 10:38 J French schreef: > On 20 Aug 2006 03:23:38 -0700, "Skybuck" <skybuck2000(a)hotmail.com> > wrote: > <snip> > > >Intel starts reading from right to left. It starts reading at the > >lowest byte/bit order/number and this corresponds to the least > >significant bit and byte. > > > >Memory layout is: > > > >bit 31.... bit 0 > >byte 3..... byte 0 > ><--------------------- > >longword 10 > >/|\ > > | > > | > >longword 0 > > > >Register layout is: > > > >bit 31.... bit 0 > >byte 3.... byte 0 > ><--------------------- > > >So intel reads from bottom to top so to speak and from right to left. > > >Crazy lol. > > >It's the world upside down ! ;) > > No it makes a lot of sense > > If you have an unsigned 4 byte number, then the same number as an > unsigned 2 byte number (or 1 byte number) is at the same location > > It makes it a heck of a lot easier reading in a number low part first True it makes sense. Bye, Skybuck.
From: J French on 20 Aug 2006 11:22 On 20 Aug 2006 07:27:04 -0700, "Skybuck" <skybuck2000(a)hotmail.com> wrote: >Day two of the competition. Sunshine, Mostly here you have hard bitten old coders You are like a puppy in a pack of old scarred wolves. We of us who help you, do so because you have a bit of youthful talent hiding under the enthusiasm. Why not calm down
From: Terry Russell on 20 Aug 2006 11:47
"J French" <erewhon(a)nowhere.uk> wrote in message news:44e87900.285852870(a)news.btopenworld.com... > On 20 Aug 2006 07:27:04 -0700, "Skybuck" <skybuck2000(a)hotmail.com> > wrote: > >>Day two of the competition. > > Sunshine, > > Mostly here you have hard bitten old coders > > You are like a puppy in a pack of old scarred wolves. > > We of us who help you, do so because you have a bit of youthful talent > hiding under the enthusiasm. > > Why not calm down or at least stay in the delphi rubber room where we can safely block sender it seems like the usual extreme effort when a 5 clock lookup , plus a 5 clock loop cycle would do ( not counting cache fill , jump offsets and a few clocks less for larger data chunks) 256 byte lookup table fill algorithm...30-400 clocks per byte load from exe data 40-400 clocks per byte breakeven around 16,000 lookups, after which the particular fill algorithm becomes rapidly irrelevant |