(load "shared/util.scm") (display "example - Huffman Encoding Trees") (newline) (define (make-leaf symbol weight) (list 'leaf symbol weight)) (define (leaf? object) (eq? (car object) 'leaf)) (define (symbol-leaf x) (cadr x)) (define (weight-leaf x) (caddr x)) (define (make-code-tree left right) (list left right (append (symbols left) (symbols right)) (+ (weight left) (weight right)))) (define (left-branch tree) (car tree)) (define (right-branch tree) (cadr tree)) (define (symbols tree) (if (leaf? tree) (list (symbol-leaf tree)) (caddr tree))) (define (weight tree) (if (leaf? tree) (weight-leaf tree) (cadddr tree))) (define (decode bits tree) (define (decode-1 bits current-branch) (if (null? bits) '() (let ((next-branch (choose-branch (car bits) current-branch))) (if (leaf? next-branch) (cons (symbol-leaf next-branch) (decode-1 (cdr bits) tree)) (decode-1 (cdr bits) next-branch))))) (decode-1 bits tree)) (define (choose-branch bit branch) (cond ((= bit 0) (left-branch branch)) ((= bit 1) (right-branch branch)) (else (error "bad bit -- CHOOSE-BRANCH" bit)))) (define (adjoin-set x set) (cond ((null? set) (list x)) ((< (weight x) (weight (car set))) (cons x set)) (else (cons (car set) (adjoin-set x (cdr set)))))) (define (make-leaf-set pairs) (if (null? pairs) '() (let ((pair (car pairs))) (adjoin-set (make-leaf (car pair) ; symbol (cadr pair)) ; frequency (make-leaf-set (cdr pairs)))))) (define l '((A 4) (B 2) (C 1) (D 1))) (display (make-leaf-set l)) (newline) (newline) (display "ex-2.67") (newline) (define sample-tree (make-code-tree (make-leaf 'A 4) (make-code-tree (make-leaf 'B 2) (make-code-tree (make-leaf 'D 1) (make-leaf 'C 1))))) ; root ; / \ ; A /\ ; B \ ; /\ ; D C (define sample-message '(0 1 1 0 0 1 0 1 0 1 1 1 0)) ; A D A B B C A (display (decode sample-message sample-tree)) (newline) (display "\nex-2.68") (newline) (define sample-message-clear '(a d a b b c a)) (define (encode message tree) (if (null? message) '() (append (encode-symbol (car message) tree) (encode (cdr message) tree)))) (define (in? symbol items) (cond ((null? items) #f) ((eq? symbol (car items)) #t) (else (in? symbol (cdr items))))) (define (encode-symbol symbol tree) (if (leaf? tree) '() (let ((left (left-branch tree)) (right (right-branch tree))) (cond ((in? symbol (symbols left)) (cons 0 (encode-symbol symbol left))) ((in? symbol (symbols right)) (cons 1 (encode-symbol symbol right))) (else (error "bad symbol -- ENCODE SYMBOL" symbol)))))) (assert (encode sample-message-clear sample-tree) sample-message) (display (encode '(A A A) sample-tree)) (newline) (display (encode '(C C A) sample-tree)) (newline) (newline) (display "ex-2.69") (newline) (define (generate-huffman-tree pairs) (successive-merge (make-leaf-set pairs))) (define (successive-merge pairs) (if (null? (cdr pairs)) (car pairs) (let ((left (car pairs)) (right (cadr pairs)) (remaining-pairs (cddr pairs))) (successive-merge (adjoin-set (make-code-tree left right) remaining-pairs))))) (define l '((A 4) (B 2) (C 1) (D 1))) (define sample-tree (generate-huffman-tree l)) (assert (encode sample-message-clear sample-tree) sample-message) (newline) (display "ex-2.70") (newline) (define alphabet '((A 2) (NA 16) (BOOM 1) (SHA 3) (GET 2) (YIP 9) (JOB 2) (WAH 1))) (define tree (generate-huffman-tree alphabet)) (define msg '(Get a job Sha na na na na na na na na Get a job Sha na na na na na na na na Wah yip yip yip yip yip yip yip yip yip Sha boom)) (define msg-enc (encode msg tree)) (display "Number of bits Huffman Encoding: ") (display (length msg-enc)) (newline) ; 84 bits are required for the encoding. ; (log_2 8) = 3 means our fixed length alphabet one have three bits per symbol. (display "Number of bits three bit fixed length alphabet: ") (display (* 3 (length msg))) (newline) (newline) (display "ex-2.71") (newline) ; (abcde 31) ; / \ ; (e 16) (abcd 15) ; / \ ; (d 8) (abc 7) ; / \ ; (c 4) (ab 3) ; / \ ; (a 1) (b 2) ; ; For the most frequent symbol one bit is required because ; weight 2^(n-1) > sum([1, 2, ..., 2^(n-2)]). ; For the least frequent symbol the number of bits is (n - 1). (display "[ok]") (newline) (newline) (display "ex-2.72") (newline) (display "[see comment]\n") ; The worst case is that the tree is totally unbalanced in which case the ; search takes n / 2 steps and for each step the list has to be searched. In ; the best case the tree is balanced and the number of steps is log n. ; The steps for the most frequent symbol is f(n) = 1. For the least frequent ; symbol n + ... + 5 + 4 + 3 + 2 + 1 = (n * (n + 1)) / 2 steps are required.