SICP/ex-2_67-72.scm

183 lines
5.4 KiB
Scheme

(load "shared/util.scm")
(display "example - Huffman Encoding Trees") (newline)
(define (make-leaf symbol weight)
(list 'leaf symbol weight))
(define (leaf? object)
(eq? (car object) 'leaf))
(define (symbol-leaf x) (cadr x))
(define (weight-leaf x) (caddr x))
(define (make-code-tree left right)
(list left
right
(append (symbols left) (symbols right))
(+ (weight left) (weight right))))
(define (left-branch tree) (car tree))
(define (right-branch tree) (cadr tree))
(define (symbols tree)
(if (leaf? tree)
(list (symbol-leaf tree))
(caddr tree)))
(define (weight tree)
(if (leaf? tree)
(weight-leaf tree)
(cadddr tree)))
(define (decode bits tree)
(define (decode-1 bits current-branch)
(if (null? bits)
'()
(let ((next-branch
(choose-branch (car bits) current-branch)))
(if (leaf? next-branch)
(cons (symbol-leaf next-branch)
(decode-1 (cdr bits) tree))
(decode-1 (cdr bits) next-branch)))))
(decode-1 bits tree))
(define (choose-branch bit branch)
(cond ((= bit 0) (left-branch branch))
((= bit 1) (right-branch branch))
(else (error "bad bit -- CHOOSE-BRANCH" bit))))
(define (adjoin-set x set)
(cond ((null? set) (list x))
((< (weight x) (weight (car set))) (cons x set))
(else (cons (car set)
(adjoin-set x (cdr set))))))
(define (make-leaf-set pairs)
(if (null? pairs)
'()
(let ((pair (car pairs)))
(adjoin-set (make-leaf (car pair) ; symbol
(cadr pair)) ; frequency
(make-leaf-set (cdr pairs))))))
(define l '((A 4) (B 2) (C 1) (D 1)))
(display (make-leaf-set l)) (newline)
(newline) (display "ex-2.67") (newline)
(define sample-tree
(make-code-tree (make-leaf 'A 4)
(make-code-tree
(make-leaf 'B 2)
(make-code-tree (make-leaf 'D 1)
(make-leaf 'C 1)))))
; root
; / \
; A /\
; B \
; /\
; D C
(define sample-message '(0 1 1 0 0 1 0 1 0 1 1 1 0))
; A D A B B C A
(display (decode sample-message sample-tree))
(newline) (display "\nex-2.68") (newline)
(define sample-message-clear '(a d a b b c a))
(define (encode message tree)
(if (null? message)
'()
(append (encode-symbol (car message) tree)
(encode (cdr message) tree))))
(define (in? symbol items)
(cond ((null? items) #f)
((eq? symbol (car items)) #t)
(else (in? symbol (cdr items)))))
(define (encode-symbol symbol tree)
(if (leaf? tree)
'()
(let ((left (left-branch tree))
(right (right-branch tree)))
(cond ((in? symbol (symbols left)) (cons 0 (encode-symbol symbol left)))
((in? symbol (symbols right)) (cons 1 (encode-symbol symbol right)))
(else (error "bad symbol -- ENCODE SYMBOL" symbol))))))
(assert (encode sample-message-clear sample-tree) sample-message)
(display (encode '(A A A) sample-tree)) (newline)
(display (encode '(C C A) sample-tree)) (newline)
(newline) (display "ex-2.69") (newline)
(define (generate-huffman-tree pairs)
(successive-merge (make-leaf-set pairs)))
(define (successive-merge pairs)
(if (null? (cdr pairs))
(car pairs)
(let ((left (car pairs))
(right (cadr pairs))
(remaining-pairs (cddr pairs)))
(successive-merge
(adjoin-set (make-code-tree left right) remaining-pairs)))))
(define l '((A 4) (B 2) (C 1) (D 1)))
(define sample-tree (generate-huffman-tree l))
(assert (encode sample-message-clear sample-tree) sample-message)
(newline) (display "ex-2.70") (newline)
(define alphabet '((A 2) (NA 16) (BOOM 1) (SHA 3) (GET 2) (YIP 9) (JOB 2) (WAH 1)))
(define tree (generate-huffman-tree alphabet))
(define msg '(Get a job Sha na na na na na na na na Get a job Sha na na na na na na na na Wah yip yip yip yip yip yip yip yip yip Sha boom))
(define msg-enc (encode msg tree))
(display "Number of bits Huffman Encoding: ")
(display (length msg-enc)) (newline)
; 84 bits are required for the encoding.
; (log_2 8) = 3 means our fixed length alphabet one have three bits per symbol.
(display "Number of bits three bit fixed length alphabet: ")
(display (* 3 (length msg)))
(newline)
(newline) (display "ex-2.71") (newline)
; (abcde 31)
; / \
; (e 16) (abcd 15)
; / \
; (d 8) (abc 7)
; / \
; (c 4) (ab 3)
; / \
; (a 1) (b 2)
;
; For the most frequent symbol one bit is required because
; weight 2^(n-1) > sum([1, 2, ..., 2^(n-2)]).
; For the least frequent symbol the number of bits is (n - 1).
(display "[ok]")
(newline)
(newline) (display "ex-2.72") (newline)
(display "[see comment]\n")
; The worst case is that the tree is totally unbalanced in which case the
; search takes n / 2 steps and for each step the list has to be searched. In
; the best case the tree is balanced and the number of steps is log n.
; The steps for the most frequent symbol is f(n) = 1. For the least frequent
; symbol n + ... + 5 + 4 + 3 + 2 + 1 = (n * (n + 1)) / 2 steps are required.