183 lines
5.4 KiB
Scheme
183 lines
5.4 KiB
Scheme
(load "util.scm")
|
|
|
|
(display "example - Huffman Encoding Trees") (newline)
|
|
|
|
(define (make-leaf symbol weight)
|
|
(list 'leaf symbol weight))
|
|
(define (leaf? object)
|
|
(eq? (car object) 'leaf))
|
|
(define (symbol-leaf x) (cadr x))
|
|
(define (weight-leaf x) (caddr x))
|
|
|
|
(define (make-code-tree left right)
|
|
(list left
|
|
right
|
|
(append (symbols left) (symbols right))
|
|
(+ (weight left) (weight right))))
|
|
|
|
(define (left-branch tree) (car tree))
|
|
(define (right-branch tree) (cadr tree))
|
|
|
|
(define (symbols tree)
|
|
(if (leaf? tree)
|
|
(list (symbol-leaf tree))
|
|
(caddr tree)))
|
|
|
|
(define (weight tree)
|
|
(if (leaf? tree)
|
|
(weight-leaf tree)
|
|
(cadddr tree)))
|
|
|
|
(define (decode bits tree)
|
|
(define (decode-1 bits current-branch)
|
|
(if (null? bits)
|
|
'()
|
|
(let ((next-branch
|
|
(choose-branch (car bits) current-branch)))
|
|
(if (leaf? next-branch)
|
|
(cons (symbol-leaf next-branch)
|
|
(decode-1 (cdr bits) tree))
|
|
(decode-1 (cdr bits) next-branch)))))
|
|
(decode-1 bits tree))
|
|
(define (choose-branch bit branch)
|
|
(cond ((= bit 0) (left-branch branch))
|
|
((= bit 1) (right-branch branch))
|
|
(else (error "bad bit -- CHOOSE-BRANCH" bit))))
|
|
|
|
(define (adjoin-set x set)
|
|
(cond ((null? set) (list x))
|
|
((< (weight x) (weight (car set))) (cons x set))
|
|
(else (cons (car set)
|
|
(adjoin-set x (cdr set))))))
|
|
|
|
(define (make-leaf-set pairs)
|
|
(if (null? pairs)
|
|
'()
|
|
(let ((pair (car pairs)))
|
|
(adjoin-set (make-leaf (car pair) ; symbol
|
|
(cadr pair)) ; frequency
|
|
(make-leaf-set (cdr pairs))))))
|
|
|
|
(define l '((A 4) (B 2) (C 1) (D 1)))
|
|
(display (make-leaf-set l)) (newline)
|
|
|
|
|
|
(newline) (display "ex-2.67") (newline)
|
|
|
|
(define sample-tree
|
|
(make-code-tree (make-leaf 'A 4)
|
|
(make-code-tree
|
|
(make-leaf 'B 2)
|
|
(make-code-tree (make-leaf 'D 1)
|
|
(make-leaf 'C 1)))))
|
|
|
|
; root
|
|
; / \
|
|
; A /\
|
|
; B \
|
|
; /\
|
|
; D C
|
|
|
|
(define sample-message '(0 1 1 0 0 1 0 1 0 1 1 1 0))
|
|
; A D A B B C A
|
|
|
|
(display (decode sample-message sample-tree))
|
|
|
|
|
|
|
|
(newline) (display "\nex-2.68") (newline)
|
|
|
|
(define sample-message-clear '(a d a b b c a))
|
|
|
|
|
|
(define (encode message tree)
|
|
(if (null? message)
|
|
'()
|
|
(append (encode-symbol (car message) tree)
|
|
(encode (cdr message) tree))))
|
|
|
|
(define (in? symbol items)
|
|
(cond ((null? items) #f)
|
|
((eq? symbol (car items)) #t)
|
|
(else (in? symbol (cdr items)))))
|
|
|
|
(define (encode-symbol symbol tree)
|
|
(if (leaf? tree)
|
|
'()
|
|
(let ((left (left-branch tree))
|
|
(right (right-branch tree)))
|
|
(cond ((in? symbol (symbols left)) (cons 0 (encode-symbol symbol left)))
|
|
((in? symbol (symbols right)) (cons 1 (encode-symbol symbol right)))
|
|
(else (error "bad symbol -- ENCODE SYMBOL" symbol))))))
|
|
|
|
(assert (encode sample-message-clear sample-tree) sample-message)
|
|
|
|
(display (encode '(A A A) sample-tree)) (newline)
|
|
(display (encode '(C C A) sample-tree)) (newline)
|
|
|
|
|
|
(newline) (display "ex-2.69") (newline)
|
|
|
|
(define (generate-huffman-tree pairs)
|
|
(successive-merge (make-leaf-set pairs)))
|
|
|
|
(define (successive-merge pairs)
|
|
(if (null? (cdr pairs))
|
|
(car pairs)
|
|
(let ((left (car pairs))
|
|
(right (cadr pairs))
|
|
(remaining-pairs (cddr pairs)))
|
|
(successive-merge
|
|
(adjoin-set (make-code-tree left right) remaining-pairs)))))
|
|
|
|
(define l '((A 4) (B 2) (C 1) (D 1)))
|
|
(define sample-tree (generate-huffman-tree l))
|
|
(assert (encode sample-message-clear sample-tree) sample-message)
|
|
|
|
|
|
(newline) (display "ex-2.70") (newline)
|
|
|
|
(define alphabet '((A 2) (NA 16) (BOOM 1) (SHA 3) (GET 2) (YIP 9) (JOB 2) (WAH 1)))
|
|
(define tree (generate-huffman-tree alphabet))
|
|
|
|
(define msg '(Get a job Sha na na na na na na na na Get a job Sha na na na na na na na na Wah yip yip yip yip yip yip yip yip yip Sha boom))
|
|
(define msg-enc (encode msg tree))
|
|
|
|
(display "Number of bits Huffman Encoding: ")
|
|
(display (length msg-enc)) (newline)
|
|
; 84 bits are required for the encoding.
|
|
|
|
; (log_2 8) = 3 means our fixed length alphabet one have three bits per symbol.
|
|
(display "Number of bits three bit fixed length alphabet: ")
|
|
(display (* 3 (length msg)))
|
|
(newline)
|
|
|
|
|
|
(newline) (display "ex-2.71") (newline)
|
|
|
|
; (abcde 31)
|
|
; / \
|
|
; (e 16) (abcd 15)
|
|
; / \
|
|
; (d 8) (abc 7)
|
|
; / \
|
|
; (c 4) (ab 3)
|
|
; / \
|
|
; (a 1) (b 2)
|
|
;
|
|
; For the most frequent symbol one bit is required because
|
|
; weight 2^(n-1) > sum([1, 2, ..., 2^(n-2)]).
|
|
; For the least frequent symbol the number of bits is (n - 1).
|
|
(display "[ok]")
|
|
(newline)
|
|
|
|
(newline) (display "ex-2.72") (newline)
|
|
(display "[see comment]\n")
|
|
|
|
; The worst case is that the tree is totally unbalanced in which case the
|
|
; search takes n / 2 steps and for each step the list has to be searched. In
|
|
; the best case the tree is balanced and the number of steps is log n.
|
|
|
|
; The steps for the most frequent symbol is f(n) = 1. For the least frequent
|
|
; symbol n + ... + 5 + 4 + 3 + 2 + 1 = (n * (n + 1)) / 2 steps are required.
|