Page in development...

The BBDB is the emacs hack your mother warned you about. However, I didn't find it quite insidious enough, so I've been hacking on the chunk of code below. It's not yet smart enough to let loose on my live bbdb, but it's pretty good for a bunch of pattern matching and arbitrary tokenising.

(defun bbdb-find-phone( &optional search-area ) 
  "Locate some things that look like a phone numbers.

If SEARCH-AREA is t, check the whole buffer.  If SEARCH-AREA is
something else, bbdb-find-phone will attempt to skip uninteresting
bits based on the assumption that the message is a mail/news
message. Recognised values are 'signature, 'all (equivalent to t),

How well this works is anyone's guess."

  ;; Whee! Let's look for a phone number.
        (let ((case-fold-search t)
                  phone restriction token-list)
          (if (or (eq t search-area)
                          (eq 'all search-area))
                  (message "Searching entire buffer, ^G to halt")

                ;; See if there are mail-headers. Well, To: or From: lines in
                ;; a section ending in a blank. This could be made a little
                ;; smarter; make sure all lines in the excluded region are of
                ;; the format "^\\([a-z-]+:\\|\\)\s-+" should do the trick.
                (if (eq 'body search-area)
                          (goto-char (point-min))
                          (if (re-search-forward "^$" nil t) ;; find "end of headers"
                                  (let ((eoh (point)))
                                        (if (re-search-backward "^\\(From\\|To\\):\\s-+" nil t)
                                                (narrow-to-region eoh (point-max)))))))
                (if (eq 'signature search-area)
                          ;; See if there's a .sig to look at
                          (goto-char (point-max))
                          (while (= 0 (forward-line -1))
                                (if (looking-at "^\-\- $") ;; ~standard
                                          (setq restriction 'signature)
                                          (narrow-to-region (point) (point-max)))))
                          ;; Try for a non-standard .sig marker
                          (if restriction
                                (goto-char (point-max))
                                (while (= 0 (forward-line -1))
                                  (if (looking-at "^[_+=/\|-]*$") ;; warlordy
                                                (setq restriction 'signature)
                                                (narrow-to-region (point) (point-max))))))))
                ;; Don't search through citations from people using ultra-broken
                ;; mailers, and don't search more than 32K of the message.
                ;; FIXME 32K should actually be a configurable variable
                (if restriction
                  (goto-char (point-min))
                  (if (re-search-forward "\\-Original Message\\-" nil t)
                          (narrow-to-region (point-min) (point))
                        (narrow-to-region (point-min) (min (point-max) 
                                                                                           (+ (point-min) 32000)))))
                ;; Should also skip over sections of quotations.?
          ;; Tokenise. This is pretty grungy, but does a reasonable job of
          ;; keeping useful things like phone numbers in one piece.  It
          ;; breaks URLs, mind you.  I don't want to add a URL parser to
          ;; this just yet...
          (goto-char (point-min))
          (while (re-search-forward 
                          "\\([a-z0-9.@<>-]+\\|[a-z0-9]+\\|[+(]?[0-9][0-9 .)-]+\\|[a-z-]+\\|-+\\|\\s-+\\|.\\)"
                          nil t)
                (let* ((token (bbdb-string-trim (match-string 0)))
                           (type (cond ((string-match "^\\([:/\|        =(),;<>*&-]+\\|\\)$" token)
                                                        (cons "sep" token))
                                                   ((string-match "^[a-f0-9]+$" token)
                                                        (cons "hex" token))
                                                   ((string-match "^\\(fax\\|home\\|mobile\\|ph\\(\\|one\\\)\\|tel\\(\\|ephone\\)\\|work\\|e\\-?mail\\)\\.?$" token)
                                                        ;; Clean up token into something standard.
                                                        (let (final-token)
                                                          (cond ((string-match "^\\(work\\|ph\\|tel\\)"
                                                                         (setq final-token "phone"))
                                                                        ((string-match "^fax" token)
                                                                         (setq final-token "fax"))
                                                                        ((string-match "^mobile" token)
                                                                         (setq final-token "mobile"))
                                                                        ((string-match "^home" token)
                                                                         (setq final-token "home"))
                                                                         (setq final-token (downcase token))))
                                                          (cons "tag" final-token)))
                                                   ((string-match "^[a-z-]+$" token)
                                                        (cons "word" token))
                                                   ((string-match "^[+(]?[0-9][0-9 .)-]+$" token)
                                                        (cons "phone" token))
                                                   ((string-match "^[a-z0-9]+\\(\\.[a-z0-9]+\\)+$" 
                                                        (cons "site" token)) ;; not really, but...
                                                   ((string-match "^?$" token)
                                                        (cons "email" token))
                                                        (cons "?" token)))))
                  (if (or (string= (car type) "sep")
                                  (string= (car type) "word"))
                        (setq token-list (append token-list (list type))))))
          ;;; Analyse
          (and token-list
                   (let ((token-list-copy (copy-alist token-list))
                                 (n 0)
                         (while (setq this-one (car token-list-copy))
                           (let ((tag (car this-one)) ;; tag
                                         (text (cdr this-one))) ;; text
                                 (if (string= tag "phone")
                                         (let ((prev-one (nth (- n 1) token-list))
                                                   prev-tag prev-text
                                                   (next-one (nth (+ n 1) token-list))
                                                   next-tag next-text)

                                           ;; See if it's preceeded by a tag
                                           (if prev-one
                                                         (setq prev-tag (car prev-one)
                                                                   prev-text (cdr prev-one))
                                                         (if (string= prev-tag "tag")
                                                                   (setq full-string (cons prev-text text))
                                                                   ;; nuke from token list
                                                                   ;;(setq token-list )))))

                                           (if (and next-one (not full-string))
                                                         (setq next-tag (car next-one)
                                                                   next-text (cdr next-one))
                                                         (if (string= next-tag "tag")
                                                                   (setq full-string (cons next-text text))
                                                                   ;; nuke from token list
                                                                   ;;(setq token-list )))))

                                           (if full-string
                                                 (setq full-string (cons "phone" text)))

                                           ;; some sort of default thing here?
                                 (and full-string
                                          (setq phone (append phone (list full-string))
                                                        full-string nil)))
                           (setq n (+ 1 n)) ;; track number of current element
                           (setq token-list-copy (cdr token-list-copy)))))