Logo Search packages:      
Sourcecode: ha version File versions  Download package

hsc.c

/***********************************************************************
  This file is part of HA, a general purpose file archiver.
  Copyright (C) 1995 Harri Hirvola

  This program is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation; either version 2 of the License, or
  (at your option) any later version.

  This program is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.

  You should have received a copy of the GNU General Public License
  along with this program; if not, write to the Free Software
  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
************************************************************************
      HA HSC method
***********************************************************************/

#include <malloc.h>
#include <stdlib.h>
#include <stdio.h>
#include "ha.h"
#include "haio.h"
#include "acoder.h"
#include "hsc.h"
#include "error.h"

#define IECLIM          32           /* initial escape counter upper limit */
#define NECLIM          5            /* no escape expected counter limit */
#define NECTLIM         4            /* */
#define NECMAX          10           /* no escape expected counter maximum */
#define MAXCLEN         4            /* assumed to be 4 in several places */
#define NUMCON          10000        /* number of contexts to remember */ 
#define NUMCFB          32760        /* number of frequencies to remember */
#define ESCTH           3            /* threshold for escape calculation */
#define MAXTVAL         8000         /* maximum frequency value */
#define RFMINI          4            /* initial refresh counter value */
#define HTLEN             16384            /* length of hash table */
#define NIL       0xffff             /* NIL pointer in lists */
#define ESC       256          /* escape symbol */

typedef unsigned char Context[4];

/* model data */
static Context curcon;              /* current context */
static U16B *ht=NULL;               /* hash table */
static U16B *hp=NULL;               /* hash list pointer array */
static Context *con=NULL;           /* context array */
static unsigned char *cl=NULL;            /* context length array */
static unsigned char *cc=NULL;            /* character counts */
static U16B *ft=NULL;               /* total frequency of context */
static unsigned char *fe=NULL;            /* frequencys under ESCTH in context */
static U16B *elp=NULL;              /* expire list previous pointer array */
static U16B *eln=NULL;              /* expire list next pointer array */
static U16B elf,ell;                /* first and last of expire list */
static unsigned char *rfm=NULL;           /* refresh counter array */
static U16B *fa=NULL;               /* frequency array */
static unsigned char *fc=NULL;            /* character for frequency array */
static U16B *nb=NULL;               /* next pointer for frequency array */
static U16B fcfbl;                  /* pointer to free frequency blocks */
static U16B nrel;             /* context for frequency block release */

/* frequency mask system */
static unsigned char cmask[256];      /* masked characters */
static unsigned char cmstack[256];    /* stack of cmask[] entries to clear */ 
static S16B cmsp;             /* pointer to cmstack */

/* escape propability modifying system variables */
static unsigned char nec;           /* counter for no escape expected */
static unsigned char iec[MAXCLEN+1];  /* initial escape counters */

/* update stack variables */
static U16B usp;              /* stack pointer */
static U16B cps[MAXCLEN+1];         /* context pointers */
static U16B as[MAXCLEN+1];          /* indexes to frequency array */

/* miscalneous */
static S16B dropcnt;                /* counter for context len drop */
static unsigned char maxclen;       /* current maximum length for context */ 
static U16B hrt[HTLEN];             /* semi random data for hashing */
static U16B hs[MAXCLEN+1];          /* hash stack for context search */ 
static S16B cslen;                  /* length of context to search */
 
/***********************************************************************
      Cleanup routine
***********************************************************************/

void hsc_cleanup(void) {
    
    if (ht!=NULL) free(ht),ht=NULL;
    if (fc!=NULL) free(fc),fc=NULL;
    if (fa!=NULL) free(fa),fa=NULL;
    if (ft!=NULL) free(ft),ft=NULL;
    if (fe!=NULL) free(fe),fe=NULL;
    if (nb!=NULL) free(nb),nb=NULL;
    if (hp!=NULL) free(hp),hp=NULL;
    if (elp!=NULL) free(elp),elp=NULL;
    if (eln!=NULL) free(eln),eln=NULL;
    if (cl!=NULL) free(cl),cl=NULL;
    if (cc!=NULL) free(cc),cc=NULL;
    if (rfm!=NULL) free(rfm),rfm=NULL;
    if (con!=NULL) free(con),con=NULL;
}


/***********************************************************************
      System initialization
***********************************************************************/

static  U16B make_context(unsigned char cl, S16B c);

static void init_model(void) {

    register S16B i;
    S32B z,l,h,t;
    
    ht=malloc(HTLEN*sizeof(*ht));         
    hp=malloc(NUMCON*sizeof(*hp));
    elp=malloc(NUMCON*sizeof(*elp));
    eln=malloc(NUMCON*sizeof(*eln));
    cl=malloc(NUMCON*sizeof(*cl));
    cc=malloc(NUMCON*sizeof(*cc));
    ft=malloc(NUMCON*sizeof(*ft));
    fe=malloc(NUMCON*sizeof(*fe));
    rfm=malloc(NUMCON*sizeof(*rfm));
    con=malloc(NUMCON*sizeof(*con));
    fc=malloc(NUMCFB*sizeof(*fc));
    fa=malloc(NUMCFB*sizeof(*fa));
    nb=malloc(NUMCFB*sizeof(*nb));
    if (hp==NULL || elp==NULL || eln==NULL ||
      cl==NULL || rfm==NULL || con==NULL ||
      cc==NULL || ft==NULL || fe==NULL ||
      fc==NULL || fa==NULL || nb==NULL || ht==NULL) {
      hsc_cleanup();
      error(1,ERR_MEM,"init_model()");
    }
    maxclen=MAXCLEN;          
    iec[0]=(IECLIM>>1);
    for (i=1;i<=MAXCLEN;++i) iec[i]=(IECLIM>>1)-1;
    dropcnt=NUMCON/4;
    nec=0;
    nrel=0;
    hs[0]=0;
    for (i=0;i<HTLEN;++i) ht[i]=NIL;      
    for (i=0;i<NUMCON;++i) {
      eln[i]=i+1;
      elp[i]=i-1;
      cl[i]=0xff;
      nb[i]=NIL;
    }
    elf=0;
    ell=NUMCON-1;
    for (i=NUMCON;i<NUMCFB-1;++i) nb[i]=i+1;
    nb[i]=NIL;
    fcfbl=NUMCON;
    curcon[3]=curcon[2]=curcon[1]=curcon[0]=0;
    cmsp=0;
    for (i=0;i<256;++i) cmask[i]=0;       
    for (z=10,i=0;i<HTLEN;++i) {
      h=z/(2147483647L/16807L);
      l=z%(2147483647L/16807L);           
      if ((t=16807L*l-(2147483647L%16807L)*h)>0) z=t;
      else z=t+2147483647L;
      hrt[i]=(U16B)z&(HTLEN-1);
    }
}

static void init_pack(void) {

    init_model();
    ac_init_encode();
}

static void init_unpack(void) {

    init_model();
    ac_init_decode();
}


/***********************************************************************
      Finite context model
***********************************************************************/

#define HASH(s,l,h)     {                                 \
                    h=0;                                    \
                    if (l) h=hrt[s[0]];                     \
                    if (l>1) h=hrt[(s[1]+h)&(HTLEN-1)];     \
                    if (l>2) h=hrt[(s[2]+h)&(HTLEN-1)];     \
                    if (l>3) h=hrt[(s[3]+h)&(HTLEN-1)];     \
                  }                                                     

#define move_context(c) curcon[3]=curcon[2],curcon[2]=curcon[1], \
                  curcon[1]=curcon[0],curcon[0]=c

static  void release_cfblocks(void) {
      
    register U16B i,j,d;
    
    do {
      do if (++nrel==NUMCON) nrel=0; while (nb[nrel]==NIL);
      for (i=0;i<=usp;++i) if ((cps[i]&0x7fff)==nrel) break;
    } while (i<=usp);   
    for (i=nb[nrel],d=fa[nrel];i!=NIL;i=nb[i]) if (fa[i]<d) d=fa[i];
    ++d;
    if (fa[nrel]<d) {
      for (i=nb[nrel];fa[i]<d && nb[i]!=NIL;i=nb[i]);
      fa[nrel]=fa[i];
      fc[nrel]=fc[i];
      j=nb[i];
      nb[i]=fcfbl;
      fcfbl=nb[nrel];
      if ((nb[nrel]=j)==NIL) {
          cc[nrel]=0;
          fe[nrel]=(ft[nrel]=fa[nrel])<ESCTH?1:0;
          return;
      }
    }
    fe[nrel]=(ft[nrel]=fa[nrel]/=d)<ESCTH?1:0;
    cc[nrel]=0;
    for (j=nrel,i=nb[j];i!=NIL;) {
      if (fa[i]<d) {
          nb[j]=nb[i];
          nb[i]=fcfbl;
          fcfbl=i;
          i=nb[j];
      }
      else {
          ++cc[nrel];
          ft[nrel]+=fa[i]/=d;
          if (fa[i]<ESCTH) fe[nrel]++;
          j=i;
          i=nb[i];
      }
    }
}

static  U16B make_context(unsigned char conlen, S16B c) {

    register S16B i;
    register U16B nc,fp;
    
    nc=ell;
    ell=elp[nc];
    elp[elf]=nc;
    eln[nc]=elf;
    elf=nc;
    if (cl[nc]!=0xff) {
      if (cl[nc]==MAXCLEN && --dropcnt==0) maxclen=MAXCLEN-1;
      HASH(con[nc],cl[nc],i);
      if (ht[i]==nc) ht[i]=hp[nc];
      else {
          for (i=ht[i];hp[i]!=nc;i=hp[i]);
          hp[i]=hp[nc];
      }
      if (nb[nc]!=NIL) {
          for (fp=nb[nc];nb[fp]!=NIL;fp=nb[fp]);
          nb[fp]=fcfbl;
          fcfbl=nb[nc];
      }
    }
    nb[nc]=NIL;
    fe[nc]=ft[nc]=fa[nc]=1;
    fc[nc]=c;
    rfm[nc]=RFMINI;
    cc[nc]=0;
    cl[nc]=conlen;
    con[nc][0]=curcon[0];
    con[nc][1]=curcon[1];
    con[nc][2]=curcon[2];
    con[nc][3]=curcon[3];
    HASH(curcon,conlen,i);
    hp[nc]=ht[i];
    ht[i]=nc;
    return nc;
}

static  void el_movefront(U16B cp) {

    if (cp==elf) return;
    if (cp==ell) ell=elp[cp];
    else {
      elp[eln[cp]]=elp[cp];
      eln[elp[cp]]=eln[cp];
    } 
    elp[elf]=cp;
    eln[cp]=elf;
    elf=cp;
}

static void  add_model(S16B c) {

    register U16B i;
    register S16B cp;
    
    while (usp!=0) {
      i=as[--usp];
      cp=cps[usp];
      if (cp&0x8000) {
          cp&=0x7fff;
          if (fcfbl==NIL) release_cfblocks();
          nb[i]=fcfbl;
          i=nb[i];
          fcfbl=nb[fcfbl];
          nb[i]=NIL;
          fa[i]=1;                  
          fc[i]=c;
          ++cc[cp];
          ++fe[cp];
      }
      else if (++fa[i]==ESCTH) --fe[cp];
      if ((fa[i]<<1)<++ft[cp]/(cc[cp]+1)) --rfm[cp];
      else if (rfm[cp]<RFMINI) ++rfm[cp];
      if (!rfm[cp] || ft[cp]>=MAXTVAL) {
          ++rfm[cp];
          fe[cp]=ft[cp]=0;
          for (i=cp;i!=NIL;i=nb[i]) {
            if (fa[i]>1) {
                ft[cp]+=fa[i]>>=1;
                if (fa[i]<ESCTH) ++fe[cp];
            }
            else {
                ++ft[cp];
                ++fe[cp];
            }
          }
      }
    }
}

static  U16B find_next(void) {

    register S16B i,k;
    register U16B cp;
    
    for (i=cslen-1;i>=0;--i) {
      k=hs[i];
      for (cp=ht[k];cp!=NIL;cp=hp[cp]) {
          if (cl[cp]==i) {
            switch (i) {
              case 4:
                if (curcon[3]!=con[cp][3]) break;
              case 3:
                if (curcon[2]!=con[cp][2]) break;
              case 2:
                if (curcon[1]!=con[cp][1]) break;
              case 1:
                if (curcon[0]!=con[cp][0]) break;
              case 0:
                cslen=i;
                return cp;
            }
          } 
      }
    }
    return NIL;
}

static  U16B find_longest(void) {

    hs[1]=hrt[curcon[0]];     
    hs[2]=hrt[(curcon[1]+hs[1])&(HTLEN-1)]; 
    hs[3]=hrt[(curcon[2]+hs[2])&(HTLEN-1)]; 
    hs[4]=hrt[(curcon[3]+hs[3])&(HTLEN-1)]; 
    usp=0;
    while(cmsp) cmask[cmstack[--cmsp]]=0;
    cslen=MAXCLEN+1;
    return find_next();
}

static U16B adj_escape_prob(U16B esc, U16B cp) {

    if (ft[cp]==1) return iec[cl[cp]]>=(IECLIM>>1)?2:1;
    if (cc[cp]==255) return 1;
    if (cc[cp] && ((cc[cp]+1)<<1)>=ft[cp]) {
      esc=(S16B)((S32B)esc*((cc[cp]+1)<<1)/ft[cp]);
      if (cc[cp]+1==ft[cp]) esc+=(cc[cp]+1)>>1;
    }
    return esc?esc:1;
}


static  S16B code_first(U16B cp, S16B c) {

    register U16B i;
    register S16B sum,cf,tot,esc;
    
    sum=cf=0;
    for (i=cp;i!=NIL;i=nb[i]) {
      if (fc[i]==c) {
          cf=fa[i];
          as[0]=i;
          break;
      }
      sum+=fa[i];
    } 
    tot=ft[cp];
    esc=adj_escape_prob(fe[cp],cp);
    if (nec>=NECLIM) {
      if (tot<=NECTLIM && nec==NECMAX) {
          tot<<=2;
          sum<<=2;
          cf<<=2;
      }
      else {
          tot<<=1;
          sum<<=1;
          cf<<=1;
      }
    }
    usp=1;
    if (cf==0) {
      ac_out(tot,tot+esc,tot+esc);
      for (i=cp;i!=NIL;sum=i,i=nb[i]) {
          cmstack[cmsp++]=fc[i];
          cmask[fc[i]]=1;
      }
      as[0]=sum;  /* sum holds last i ! */
      nec=0;
      if (ft[cp]==1 && iec[cl[cp]]<IECLIM) ++iec[cl[cp]];
      cps[0]=0x8000|cp; 
      return 0;
    }
    ac_out(sum,sum+cf,tot+esc);
    if (nec<NECMAX) ++nec;
    if (ft[cp]==1 && iec[cl[cp]]) --iec[cl[cp]];
    cps[0]=cp;
    return 1;
}


static  S16B code_rest(U16B cp, S16B c) {

    register U16B i;
    register S16B sum,cf,tot,esc;
    
    tot=sum=cf=esc=0;
    for (i=cp;i!=NIL;i=nb[i]) {
      if (!cmask[fc[i]]) {
          if (fa[i]<ESCTH) ++esc;
          if (cf==0 && fc[i]==c) {
            sum=tot;
            cf=fa[i];
            as[usp]=i;
          }
          tot+=fa[i];
      }
    } 
    esc=adj_escape_prob(esc,cp);
    if (cf==0) {
      ac_out(tot,tot+esc,tot+esc);
      for (i=cp;i!=NIL;sum=i,i=nb[i]) {
          if (!cmask[fc[i]]) {
            cmstack[cmsp++]=fc[i];
            cmask[fc[i]]=1;
          }
      }
      as[usp]=sum;  /* sum holds last i ! */
      if (ft[cp]==1 && iec[cl[cp]]<IECLIM) ++iec[cl[cp]];
      cps[usp++]=0x8000|cp; 
      return 0;
    }
    ac_out(sum,sum+cf,tot+esc);
    ++nec;   /* must add test used in code_first() if NECMAX<5 ! */
    if (ft[cp]==1 && iec[cl[cp]]) --iec[cl[cp]];
    cps[usp++]=cp;
    return 1;
}

static  void code_new(S16B c) {
    
    register S16B i;
    register U16B sum,tot;
    
    sum=0;
    tot=257-cmsp;
    for (i=0;i<c;++i) sum+=1-cmask[i];
    ac_out(sum,sum+1,tot);
}

static  S16B decode_first(U16B cp) {
    
    register U16B c;
    register U16B tv;
    register U16B i;
    register S16B sum,tot,esc,cf;
    register unsigned char sv;

    esc=adj_escape_prob(fe[cp],cp);
    tot=ft[cp];
    if (nec>=NECLIM) {
      if (tot<=NECTLIM && nec==NECMAX) sv=2;
      else sv=1;
      tot<<=sv;
      tv=ac_threshold_val(tot+esc)>>sv;
      for (c=cp,sum=0;;c=nb[c]) {
          if (c==NIL) break;
          if (sum+fa[c]<=tv) sum+=fa[c];
          else {
            cf=fa[c]<<sv;
            break;
          }
      }
      sum<<=sv;
    }
    else {
      tv=ac_threshold_val(tot+esc);
      for (c=cp,sum=0;;c=nb[c]) {
          if (c==NIL) break;
          if (sum+fa[c]<=tv) sum+=fa[c];
          else {
            cf=fa[c];
            break;
          }
      }
    }
    usp=1;
    if (c!=NIL) {
      ac_in(sum,sum+cf,tot+esc);
      if (ft[cp]==1 && iec[cl[cp]]) --iec[cl[cp]];
      as[0]=c;
      cps[0]=cp;
      c=fc[c];
      if (nec<NECMAX) ++nec;
    }
    else {
      ac_in(tot,tot+esc,tot+esc);
      if (ft[cp]==1 && iec[cl[cp]]<IECLIM) ++iec[cl[cp]];
      for (i=cp;i!=NIL;sum=i,i=nb[i]) {
          cmstack[cmsp++]=fc[i];
          cmask[fc[i]]=1;
      }
      cps[0]=0x8000|cp;
      as[0]=sum;
      c=ESC;
      nec=0;
    }
    return c;
}

static  S16B decode_rest(U16B cp) {

    register U16B c;
    register U16B tv;
    register U16B i;
    register S16B sum,tot,esc,cf;
    
    esc=tot=0;
    for (i=cp;i!=NIL;i=nb[i]) {
      if (!cmask[fc[i]]) {
          tot+=fa[i];
          if (fa[i]<ESCTH) ++esc;
      }
    }
    esc=adj_escape_prob(esc,cp);
    tv=ac_threshold_val(tot+esc);
    for (c=cp,sum=0;;c=nb[c]) {
      if (c==NIL) break;
      if (!cmask[fc[c]]) {
          if (sum+fa[c]<=tv) sum+=fa[c];
          else {
            cf=fa[c];
            break;
          }
      }
    }
    if (c!=NIL) {
      ac_in(sum,sum+cf,tot+esc);
      if (ft[cp]==1 && iec[cl[cp]]) --iec[cl[cp]];
      as[usp]=c;
      cps[usp++]=cp;
      c=fc[c];
      ++nec;  /* must add test used in code_first() if NECMAX<5 ! */
    }
    else {
      ac_in(tot,tot+esc,tot+esc);
      if (ft[cp]==1 && iec[cl[cp]]<IECLIM) ++iec[cl[cp]];
      for (i=cp;i!=NIL;sum=i,i=nb[i]) {
          if (!cmask[fc[i]]) {
            cmstack[cmsp++]=fc[i];
            cmask[fc[i]]=1;
          }
      }
      cps[usp]=0x8000|cp;
      as[usp++]=sum;          /* sum holds last i !! */
      c=ESC;
    }
    return c;
}

static  S16B decode_new(void) {
    
    register S16B c;
    register U16B tv,sum,tot;
    
    tot=257-cmsp;
    tv=ac_threshold_val(tot);
    for (c=sum=0;c<256;++c) {
      if (cmask[c]) continue;
      if (sum+1<=tv) ++sum;
      else break;
    }
    ac_in(sum,sum+1,tot);     
    return c;
}

#define code_byte(cp,c) (cmsp?code_rest(cp,c):code_first(cp,c))
#define decode_byte(cp) (cmsp?decode_rest(cp):decode_first(cp))

/***********************************************************************
      Encoding
***********************************************************************/

void hsc_pack(void) {

    S16B c;
    U16B cp;
    unsigned char ncmax,ncmin;
    
    init_pack();
    for (;(c=getbyte())>=0;) {
      cp=find_longest();
      ncmin=cp==NIL?0:cl[cp]+1;
      ncmax=maxclen+1;
      for(;;) {
          if (cp==NIL) {
            code_new(c);
            break;
          }             
          if (code_byte(cp,c)) {
            el_movefront(cp);
            break;
          }       
          cp=find_next();
      }
      add_model(c);
      while (ncmax>ncmin) make_context(--ncmax,c);
      move_context(c);
    }
    cp=find_longest();
    while (cp!=NIL) {
      code_byte(cp,ESC);
      cp=find_next();
    }
    code_new(ESC);
    ac_end_encode();
    hsc_cleanup();
}

/***********************************************************************
      Decoding
***********************************************************************/

void hsc_unpack(void) {

    S16B c;
    U16B cp;
    unsigned char ncmax,ncmin;
    
    init_unpack();
    for (;;) {
      cp=find_longest(); 
      ncmin=cp==NIL?0:cl[cp]+1;
      ncmax=maxclen+1;
      for(;;) {
          if (cp==NIL) {
            c=decode_new();
            break;
          }             
          if ((c=decode_byte(cp))!=ESC) {
            el_movefront(cp);
            break;
          }       
          cp=find_next();
      }
      if (c==ESC) break;
      add_model(c);
      while (ncmax>ncmin) make_context(--ncmax,c);
      putbyte(c);
      move_context(c);
    }
    flush();
    hsc_cleanup();
}


Generated by  Doxygen 1.6.0   Back to index