From: nick on 13 May 2010 21:44 I wanted to try making a javascript compressor (more specifically, implement a compression engine in javascript, not necessarily for compressing js) ... but then I found this: http://marklomas.net/ch-egg/articles/lzwjs.htm The code is very cleanly written, although it could use some optimizations (stuff should be moved from constructors to prototypes for starters). It outputs an array of numbers (byte array) instead of text... I wanted it to output and read base64 encoded text. After I got that working, I tried to squeeze a few more bytes with this base 90 scheme I came up with. Let me know what you guys think? // BOF this.base90 = new function () { var slash = /\\/g, tilde = /~/g; // encode a number as a base 90 string this.fromNumber = function (n) { for (var r=''; n;) { r += String.fromCharCode((n%90) + 35); n = Math.floor(n/90); } return r.replace(slash,'~'); } // decode a base 90 string to a number this.toNumber = function (str) { var s=str.replace(tilde,'\\'); for (var i=s.length, r=0; i--;) { r += Math.pow(90, i) * (s.charCodeAt(i) - 35); } return r; } // encode a byte array as a base 90 string this.fromArray = function (a) { var i, r='', q=0, e, l=a.length; for (i=0; i<l; i++) { q += a[i] * Math.pow(255, i%6); if ((i+1)%6) continue; e = this.fromNumber(q); r += e + (e.length < 8 ? ' ' : ''); q = 0; } if (q) r += this.fromNumber(q); return r.trim(); } // decode a base 90 string to a byte array this.toArray = function (str) { var i, r=[], l=str.length, c, n, p, q=''; for (i=0; i<l; i++) { q += (c = str.charAt(i)); if (c != ' ' && q.length < 8) continue; this.encodeChunk(q, r); q = ''; } if (q) this.encodeChunk(q, r); return r; } this.fromString = function (str) { var i, a=[], l=str.length, r=''; for (i=0; i<l; i++) a.push(str.charCodeAt(i)); return this.fromArray(a); } this.toString = function (str) { var i, a = this.toArray(str), l=a.length, r=''; for (i=0; i<l; i++) r += String.fromCharCode(a[i]); return r; } this.encodeChunk = function (str, arr) { for (var n=this.toNumber(str.trim()), p=6; p--;) { arr.push(n%255); if (!n) break; n = Math.floor(n/255); } } } // Test var src = "The quick brown fox jumps over the lazy red dog."; console.log(src); var dst = base90.fromString(src); console.log(dst); src = base90.toString(dst); console.log(src); // EOF The intermediate array storage is there so it works with the compression script linked above. I'm going to merge everything together and optimize some stuff, then try to get the decompressor as small as possible... So what do you guys think? Is this any better than base 64? It sure as hell is ugly :)
From: BGB / cr88192 on 14 May 2010 00:01 usual answer is base85. base85 can encode 32 bits in 5 bytes. (note: base85 is a bit better than base64 in terms of what it does...). base 90 will not save much of anything (vs base 85) unless one transforms large blocks at a time. in fact, it would be needed to encode 8 additional bytes to save 1 byte (so, a unit of 13 bytes). this would encode around 84.4 bits, 80 bits being a byte-multiple (80 bits in 13 bytes). base85 could encode 96 bits in 15 bytes. so, base90 has a mean-effectiveness of 6.154, base85 6.4. in terms of the smallest basic unit, base85 is better. the cases is different at 168 bits in 26 bytes, which has an effectiveness of 6.462. in either case, 168 bits is an awkward unit... or such... "nick" <nick___(a)fastmail.fm> wrote in message news:75b38d32-b2f9-4445-97cb-3e63e2da3f1f(a)r34g2000yqj.googlegroups.com... >I wanted to try making a javascript compressor (more specifically, > implement a compression engine in javascript, not necessarily for > compressing js) ... but then I found this: > > http://marklomas.net/ch-egg/articles/lzwjs.htm > > The code is very cleanly written, although it could use some > optimizations (stuff should be moved from constructors to prototypes > for starters). > > It outputs an array of numbers (byte array) instead of text... I > wanted it to output and read base64 encoded text. After I got that > working, I tried to squeeze a few more bytes with this base 90 scheme > I came up with. Let me know what you guys think? > > // BOF > > this.base90 = new function () { > > var slash = /\\/g, tilde = /~/g; > > // encode a number as a base 90 string > this.fromNumber = function (n) { > for (var r=''; n;) { > r += String.fromCharCode((n%90) + 35); > n = Math.floor(n/90); > } > return r.replace(slash,'~'); > } > > // decode a base 90 string to a number > this.toNumber = function (str) { > var s=str.replace(tilde,'\\'); > for (var i=s.length, r=0; i--;) { > r += Math.pow(90, i) * (s.charCodeAt(i) - 35); > } > return r; > } > > // encode a byte array as a base 90 string > this.fromArray = function (a) { > var i, r='', q=0, e, l=a.length; > for (i=0; i<l; i++) { > q += a[i] * Math.pow(255, i%6); > if ((i+1)%6) continue; > e = this.fromNumber(q); > r += e + (e.length < 8 ? ' ' : ''); > q = 0; > } > if (q) r += this.fromNumber(q); > return r.trim(); > } > > // decode a base 90 string to a byte array > this.toArray = function (str) { > var i, r=[], l=str.length, c, n, p, q=''; > for (i=0; i<l; i++) { > q += (c = str.charAt(i)); > if (c != ' ' && q.length < 8) continue; > this.encodeChunk(q, r); > q = ''; > } > if (q) this.encodeChunk(q, r); > return r; > } > > this.fromString = function (str) { > var i, a=[], l=str.length, r=''; > for (i=0; i<l; i++) a.push(str.charCodeAt(i)); > return this.fromArray(a); > } > > this.toString = function (str) { > var i, a = this.toArray(str), l=a.length, r=''; > for (i=0; i<l; i++) r += String.fromCharCode(a[i]); > return r; > } > > this.encodeChunk = function (str, arr) { > for (var n=this.toNumber(str.trim()), p=6; p--;) { > arr.push(n%255); > if (!n) break; > n = Math.floor(n/255); > } > } > } > > // Test > > var src = "The quick brown fox jumps over the lazy red dog."; > console.log(src); > > var dst = base90.fromString(src); > console.log(dst); > > src = base90.toString(dst); > console.log(src); > > // EOF > > The intermediate array storage is there so it works with the > compression script linked above. I'm going to merge everything > together and optimize some stuff, then try to get the decompressor as > small as possible... > > So what do you guys think? Is this any better than base 64? It sure as > hell is ugly :)
From: Paul E. Schoen on 15 May 2010 19:28 "nick" <nick___(a)fastmail.fm> wrote in message news:75b38d32-b2f9-4445-97cb-3e63e2da3f1f(a)r34g2000yqj.googlegroups.com... >I wanted to try making a javascript compressor (more specifically, > implement a compression engine in javascript, not necessarily for > compressing js) ... but then I found this: > > http://marklomas.net/ch-egg/articles/lzwjs.htm > > So what do you guys think? Is this any better than base 64? It sure as > hell is ugly :) I had always thought Base64 was used just to be able to send binary data in text form, without control characters. Compression was a separate process. There seem to be 94 ASCII characters (excluding Space) that could be used for encoding. If all 8 bits of an unsigned character could be used, a base 128 would be possible, and it should be twice as efficient as base64. http://code.google.com/apis/protocolbuffers/docs/encoding.html I know that anyone can do a search and use the Wiki, but this seems to have a good explanation: http://en.wikipedia.org/wiki/Base64 I made a Base64 converter some time ago, perhaps using Delphi or C, but I can't find it. Paul
From: nick on 16 May 2010 19:38 On May 15, 7:28 pm, "Paul E. Schoen" <p...(a)pstech-inc.com> wrote: > "nick" <nick...(a)fastmail.fm> wrote in message > > news:75b38d32-b2f9-4445-97cb-3e63e2da3f1f(a)r34g2000yqj.googlegroups.com... > > >I wanted to try making a javascript compressor (more specifically, > > implement a compression engine in javascript, not necessarily for > > compressing js) ... but then I found this: > > >http://marklomas.net/ch-egg/articles/lzwjs.htm > > > So what do you guys think? Is this any better than base 64? It sure as > > hell is ugly :) > > I had always thought Base64 was used just to be able to send binary data in > text form, without control characters. Compression was a separate process.. I think you may have misread the OP. I was a bit OT at the beginning (trying to set context). > There seem to be 94 ASCII characters (excluding Space) that could be used > for encoding. If all 8 bits of an unsigned character could be used, a base > 128 would be possible, and it should be twice as efficient as base64.http://code.google.com/apis/protocolbuffers/docs/encoding.html But characters like slash and quote can be troublesome. The way I implemented it I could squeeze in one more character I think, but base91 just sounded too silly.
From: Dr J R Stockton on 17 May 2010 14:09 In comp.lang.javascript message <M6GHn.11586$Gx2.2053(a)newsfe20.iad>, Sat, 15 May 2010 19:28:30, Paul E. Schoen <paul(a)pstech-inc.com> posted: > >I had always thought Base64 was used just to be able to send binary >data in text form, without control characters. Yes. > Compression was a separate process. There seem to be 94 ASCII >characters (excluding Space) that could be used for encoding. If all 8 >bits of an unsigned character could be used, a base 128 would be >possible, and it should be twice as efficient as base64. No; a 7-bit character only contains about 15% more information than a 6-bit one. -- (c) John Stockton, near London. *@merlyn.demon.co.uk/?.?.Stockton(a)physics.org Web <URL:http://www.merlyn.demon.co.uk/> - FAQish topics, acronyms, & links. Correct <= 4-line sig. separator as above, a line precisely "-- " (RFC5536/7) Do not Mail News to me. Before a reply, quote with ">" or "> " (RFC5536/7)
|
Next
|
Last
Pages: 1 2 Prev: Between hard code writing hours Next: How to Pump $1,000s in CASH & Checks to your door. |