#!/usr/bin/env python ## compressor (Golomb-Rice) for a sparse file, with many 0s and few 1s. ## Has one parameter m, which defines via M=2^m the typical number of consecutive 0s expected; ## The Golomb-Rice code can be viewed as an approximate arithmetic code ## (c) David J.C. MacKay ## License: GPL http://www.gnu.org/copyleft/gpl.html ## Originates from: http://www.inference.phy.cam.ac.uk/mackay/itprnn/code/c/compress/ """ Compressor (Golomb) for a sparse file, with many 0s and few 1s. Has one parameter m, which defines via M=2^m the typical number of consecutive 0s expected; The Golomb code can be viewed as an approximate arithmetic code. The Golomb code with parameter m is also identical to the Huffman run-length code for a particular value of the P(1). This package provides functions encode(string,m) and decode(string,m) """ import sys, string , re from regex_example import re_show, re_delete from IntegerCodes import dec_to_bin verbose=0; mGolombDefault = 7 def encode(string, mGolomb=mGolombDefault): MGolomb = (1<= MGolomb ) : ans = ans+"1" ## sending "1" encodes "0"xM if verbose: print ans r = 0 counter +=1 pass pass elif ( s=='1' ) : tot1 += 1 ## sending "0" encodes the fact that, next, there are fewer than Mx"0" on the way, followed by a "1" ## print out current value of r using m bits counter +=1 ans = ans + "0" + dec_to_bin( r , mGolomb ) ## sending "r" in binary encodes "0"xr followed by 1. if verbose: print ans counter +=mGolomb r = 0 pass else : ## we ignore carriage returns, etc pass pass ## To make a self-delimiting file, we need to send the final value of r, and have the receiver ## know to remove the final "1". As encoder, we act as if an extra '1' were received. ans = ans + "0" + dec_to_bin( r , mGolomb ) ; counter +=1 + mGolomb ## sys.stderr.write("\nShannon information content of the file is %d log(f) + %d log(1-f) = %8.3f\n",tot0,tot1, (tot0*log(0.99)+tot1*log(0.01))/log(0.5) ) ## fprintf(stderr,"Decompress with GolombDecode %d < filename\n",tot0+tot1 ) ; ## print ans N=tot0+tot1 print >> sys.stderr, N,"bits read (",tot0," 0s,",tot1," 1s); ",counter,"bits written" return ans pass ## prints a string of r zeroes, PRECEDED by "1" if the second flag is set. def printzeroes( r , needtoprint1 , ans ) : ## here we wish to modify the value of needtoprint1, so we send it as an array pointer if ( needtoprint1[0] ) : if(verbose): print ans ; print ans[0] ; pass ans[0] = ans[0] + "1" needtoprint1[0] = 0 pass if r>0: ans[0] = ans[0] + "0"*r pass def join(alist,sep='\n'): return sep.join(alist) def decode(mystring, mGolomb=mGolombDefault): if(verbose): print "decoding ", mystring MGolomb = (1<