From: Andrew Morton on
On Mon, 9 Nov 2009 11:22:16 +0100
Joakim Tjernlund <Joakim.Tjernlund(a)transmode.se> wrote:

> This improves zlib: Optimize inffast when copying direct from output
> and gives another 3-4% improvement for my MPC8321 target.
> Does not need CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS,
> uses get_unaligned() but only in one place.
> The copy loop just above this one can also use this
> optimization, but I havn't done so as I have not tested if it
> is a win there too.
>
> Signed-off-by: Joakim Tjernlund <Joakim.Tjernlund(a)transmode.se>
> ---
>
> Someone with a little endian target should test too.

eh? That requires a PC. Can't you test it??
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo(a)vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
From: roel kluin on
On Mon, Nov 9, 2009 at 11:22 AM, Joakim Tjernlund
<Joakim.Tjernlund(a)transmode.se> wrote:
> This improves zlib: Optimize inffast when copying direct from output
> and gives another 3-4% improvement for my MPC8321 target.
> Does not need CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS,
> uses get_unaligned() but only in one place.
> The copy loop just above this one can also use this
> optimization, but I havn't done so as I have not tested if it
> is a win there too.
>
> Signed-off-by: Joakim Tjernlund <Joakim.Tjernlund(a)transmode.se>
> ---


> @@ -240,52 +243,49 @@ void inflate_fast(z_streamp strm, unsigned start)
>                 }
>                 else {
>                     from = out - dist;          /* copy direct from output */
> -#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
>                     /* minimum length is three */
>                    if (dist > 2 ) {
> -                       unsigned short *sout = (unsigned short *)(out - OFF);
> -                       unsigned short *sfrom = (unsigned short *)(from - OFF);
> -                       unsigned long loops = len >> 1;
> +                       unsigned short *sout;
> +                       unsigned short *sfrom;
> +                       unsigned long loops;
>
> +                       /* Align out addr, only sfrom might be unaligned */
> +                       if (!((long)(out - 1 + OFF)) & 1) {

I think this is wrong

did you mean

if (!((long)(out - 1 + OFF) & 1))

> +                           PUP(out) = PUP(from);
> +                           len--;
> +                       }
> +                       sout = (unsigned short *)(out - OFF);
> +                       sfrom = (unsigned short *)(from - OFF);
> +                       loops = len >> 1;
>                        do
> -                           PUP(sout) = PUP(sfrom);
> +                           PUP(sout) = UP_UNALIGNED(sfrom);
>                        while (--loops);
>                        out = (unsigned char *)sout + OFF;
>                        from = (unsigned char *)sfrom + OFF;
>                        if (len & 1)
>                            PUP(out) = PUP(from);
> -                   } else if (dist == 2) {
> -                       unsigned short *sout = (unsigned short *)(out - OFF);
> +                   } else { /* dist == 1 or dist == 2 */
> +                       unsigned short *sout;
>                        unsigned short pat16;
> -                       unsigned long loops = len >> 1;
> +                       unsigned long loops;
>
> +                       /* Align out addr */
> +                       if (!((long)(out - 1 + OFF)) & 1) {

and

if (!((long)(out - 1 + OFF) & 1))

> +                           PUP(out) = PUP(from);
> +                           len--;
> +                       }
> +                       sout = (unsigned short *)(out - OFF);
>                        pat16 = *(sout-2+2*OFF);
> +                       if (dist == 1)
> +                           pat16 = (pat16 & 0xff) | ((pat16 & 0xff ) << 8);
> +                       loops = len >> 1;
>                        do
>                            PUP(sout) = pat16;
>                        while (--loops);
>                        out = (unsigned char *)sout + OFF;
>                        if (len & 1)
>                            PUP(out) = PUP(from);
> -                   } else {
> -                       unsigned char pat8 = *(out - 1 + OFF);
> -
> -                       do {
> -                           PUP(out) = pat8;
> -                       } while (--len);
>                    }
> -#else
> -                    do {                        /* minimum length is three */
> -                        PUP(out) = PUP(from);
> -                        PUP(out) = PUP(from);
> -                        PUP(out) = PUP(from);
> -                        len -= 3;
> -                    } while (len > 2);
> -                    if (len) {
> -                        PUP(out) = PUP(from);
> -                        if (len > 1)
> -                            PUP(out) = PUP(from);
> -                    }
> -#endif
>                 }
>             }
>             else if ((op & 64) == 0) {          /* 2nd level distance code */
> --
> 1.6.4.4
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo(a)vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
>
From: Joakim Tjernlund on
roel kluin <roel.kluin(a)gmail.com> wrote on 12/11/2009 00:46:41:
>
> On Mon, Nov 9, 2009 at 11:22 AM, Joakim Tjernlund
> <Joakim.Tjernlund(a)transmode.se> wrote:
> > This improves zlib: Optimize inffast when copying direct from output
> > and gives another 3-4% improvement for my MPC8321 target.
> > Does not need CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS,
> > uses get_unaligned() but only in one place.
> > The copy loop just above this one can also use this
> > optimization, but I havn't done so as I have not tested if it
> > is a win there too.
> >
> > Signed-off-by: Joakim Tjernlund <Joakim.Tjernlund(a)transmode.se>
> > ---
>
>
> > @@ -240,52 +243,49 @@ void inflate_fast(z_streamp strm, unsigned start)
> > � � � � � � � � }
> > � � � � � � � � else {
> > � � � � � � � � � � from = out - dist; � � � � �/* copy direct from output */
> > -#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
> > � � � � � � � � � � /* minimum length is three */
> > � � � � � � � � � �if (dist > 2 ) {
> > - � � � � � � � � � � � unsigned short *sout = (unsigned short *)(out - OFF);
> > - � � � � � � � � � � � unsigned short *sfrom = (unsigned short *)(from - OFF);
> > - � � � � � � � � � � � unsigned long loops = len >> 1;
> > + � � � � � � � � � � � unsigned short *sout;
> > + � � � � � � � � � � � unsigned short *sfrom;
> > + � � � � � � � � � � � unsigned long loops;
> >
> > + � � � � � � � � � � � /* Align out addr, only sfrom might be unaligned */
> > + � � � � � � � � � � � if (!((long)(out - 1 + OFF)) & 1) {
>
> I think this is wrong
>
> did you mean
>
> if (!((long)(out - 1 + OFF) & 1))

Yes, will fix and send out a new patch with
cleanups and fixes for CPUs that cannot do unaligned
accesses. Thanks

Jocke

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo(a)vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
From: Joakim Tjernlund on
Andrew Morton <akpm(a)linux-foundation.org> wrote on 12/11/2009 00:24:45:
>
> On Mon, 9 Nov 2009 11:22:16 +0100
> Joakim Tjernlund <Joakim.Tjernlund(a)transmode.se> wrote:
>
> > This improves zlib: Optimize inffast when copying direct from output
> > and gives another 3-4% improvement for my MPC8321 target.
> > Does not need CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS,
> > uses get_unaligned() but only in one place.
> > The copy loop just above this one can also use this
> > optimization, but I havn't done so as I have not tested if it
> > is a win there too.
> >
> > Signed-off-by: Joakim Tjernlund <Joakim.Tjernlund(a)transmode.se>
> > ---
> >
> > Someone with a little endian target should test too.
>
> eh? That requires a PC. Can't you test it??

It needs something that actually uses the inflate too on that PC and
I don't have that handy.
Anyhow, I downloaded zip and made the changes and tested a little, found
a problem. Will resend a new patch with everything in it soon.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo(a)vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/