- Reworked the HTTP-REPLY object. Notably, the headers are now alists for
authorMatthew Mondor <mmondor@pulsar-zone.net>
Mon, 10 Sep 2012 21:47:49 +0000 (21:47 +0000)
committerMatthew Mondor <mmondor@pulsar-zone.net>
Mon, 10 Sep 2012 21:47:49 +0000 (21:47 +0000)
  easier manipulation from both HTTPd and user code, some knobs were added
  to better control the HTTP-REPLY-FLUSH behaviour, and the headers list
  to string conversion was made into a function.
- Implemented HTTP/1.1 Range (single and multiple ranges are supported).

mmsoftware/cl/server/httpd.lisp

index 6101573..6604b0d 100644 (file)
@@ -1,4 +1,4 @@
-;;;; $Id: httpd.lisp,v 1.10 2012/09/03 17:00:47 mmondor Exp $
+;;;; $Id: httpd.lisp,v 1.11 2012/09/10 21:47:49 mmondor Exp $
 
 #|
 
@@ -98,8 +98,10 @@ THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
           #:mime-reload
           #:http-reply
           #:make-http-reply
-          #:http-reply-nocache
-          #:http-reply-header-add
+          #:http-reply-header-set
+          #:http-reply-header-get
+          #:http-reply-header-unset
+          #:http-reply-header-set-nocache
           #:http-reply-content-add
           #:http-reply-log-time
           #:http-reply-flush
@@ -110,12 +112,13 @@ THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
           #:url-encode
           #:req-var
           #:with-http-let
+          #:range-validate
           #:debug-feature))
 
 (in-package :httpd)
 
 (defparameter *rcsid*
-  "$Id: httpd.lisp,v 1.10 2012/09/03 17:00:47 mmondor Exp $")
+  "$Id: httpd.lisp,v 1.11 2012/09/10 21:47:49 mmondor Exp $")
 
 (defparameter *server-version*
   (let ((parts (string-split *rcsid*
@@ -400,40 +403,87 @@ THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     (http-reply-signal)
   ())
 
-;;; XXX Accesses dynamic variable symbols more than once
-(defstruct http-reply
-  (date (server-time-rfc) :type string)
-  (code 200 :type real)
-  (description "Ok" :type string)
-  (headers (list
-           (format nil "Server: ~A" *server-version*)
-           "Accept-Ranges: bytes")
-          :type list)
-  (content '() :type list)
-  (mime-type "text/html" :type string)
-  (charset (if *request*
-              (vhost-charset (http-request-vhost *request*))
-              :utf-8)
-          :type keyword)
-  (protocol (if *request*
-               (let ((protocol (http-request-protocol *request*)))
-                 (if protocol
-                     protocol
-                     0.9))
-               0.9)
-           :type float)
-  (no-keepalive nil))
-
-(defun http-reply-nocache (reply)
-  (nconc (http-reply-headers reply)
-        (list
-         "Expires: Mon, 26 Jul 1997 05:00:00 GMT"
-         (format nil "Last-Modified: ~A" (http-reply-date reply))
-         "Cache-Control: no-cache, must-revalidate"
-         "Pragma: no-cache")))
-
-(defun http-reply-header-add (reply fmt &rest fmt-args)
-  (push (apply #'format nil fmt fmt-args) (http-reply-headers reply)))
+(defstruct (http-reply (:constructor %make-http-reply))
+  (date                (server-time-rfc)       :type string)
+  (code                200                     :type real)
+  (description "Ok"                    :type string)
+  (mime-type   "text/html"             :type string)
+  (charset     :utf-8                  :type keyword)
+  (headers     '()                     :type list)
+  (content     '()                     :type list)
+  (protocol    0.9                     :type float)
+  (no-keepalive        nil                     :type boolean)
+  (flush-log   t                       :type boolean)
+  (flush-len   t                       :type boolean)
+  (flush-signal        t                       :type boolean))
+
+(defun make-http-reply (&rest keys)
+  (let* ((reply (apply #'%make-http-reply keys))
+        (request *request*))
+    (setf (http-reply-headers reply)
+         (list `("Server" . ,*server-version*)
+               '("Accept-Ranges" . "bytes")))
+    (when request
+      (setf (http-reply-charset reply)
+            (vhost-charset (http-request-vhost request))
+           (http-reply-protocol reply)
+            (let ((protocol (http-request-protocol request)))
+              (if protocol
+                  protocol
+                  0.9))
+            (http-reply-charset reply)
+             (vhost-charset (http-request-vhost request))))
+    reply))
+
+(defun http-reply-header-set (reply &rest pairs)
+  (with-accessors ((headers http-reply-headers)) reply
+    (loop
+       for pair in pairs
+       do
+        (destructuring-bind (key fmt &rest fmt-args) pair
+          (let* ((match (assoc key headers :test #'string-equal))
+                 (line (if fmt-args
+                           (apply #'format nil fmt fmt-args)
+                           fmt)))
+            (if match
+                (rplacd match line)
+                (setf headers (acons key line headers)))))))
+  (values))
+
+(defun http-reply-header-get (reply key)
+  (assoc key (http-reply-headers reply) :test #'string-equal))
+
+(defun http-reply-header-unset (reply key)
+  (with-accessors ((headers http-reply-headers)) reply
+    (let ((match (assoc key headers :test #'string-equal)))
+      (if match
+         (rplacd match nil))))
+  (values))
+
+(defun http-reply-header-set-nocache (reply)
+  ;; We can't use PAIRLIS as we're later iterating through the list
+  ;; which must be a set, and PAIRLIS may insert or append...
+  (http-reply-header-set
+   reply
+   '("Expires" "Mon, 26 Jul 1997 05:00:00 GMT")
+   `("Last-Modified" ,(http-reply-date reply))
+   '("Cache-Control" "no-cache, must-revalidate")
+   '("Pragma" "no-cache")))
+
+(defvar *crlf* (concatenate 'string '(#\Return #\LineFeed)))
+
+(defun http-reply-headers-string (reply)
+  (with-output-to-string (out)
+    (loop
+       with crlf = *crlf*
+       with alist = (sort (http-reply-headers reply) ; XXX Unecessary to
+                         #'string-lessp :key #'car) ; sort, only fancy
+       for item in alist
+       for (key . line) = item
+       when line
+       do
+        (format out "~:(~A~): ~A~A" key line crlf)
+       finally (write-string crlf out))))
 
 (defun http-reply-content-add (reply content)
   (push content (http-reply-content reply)))
@@ -465,19 +515,19 @@ THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
                          (loop
                             for s in content
                             sum (length s) into len of-type fixnum
-                            finally (return len))))
-        (crlf (format nil "~C~C" #\Return #\LineFeed)))
+                            finally (return len)))))
 
     (when request
       (let ((connection *connection*))
        (when (http-reply-no-keepalive reply)
-         (http-request-disable-keepalive request))
+         (setf (http-request-keep-alive request) nil))
        (macrolet ((field (f &optional (type :string))
                     `(let ((%f ,f))
                        ,(if (eq type :string)
                             `(if %f %f "-")
                             `(if (zerop %f) "-" %f)))))
-         (when (config-request-log config)
+         (when (and (config-request-log config)
+                    (http-reply-flush-log reply))
            (log-line-nostamp "~X ~A - - ~A ~A ~S ~A ~A \"~A\" \"~A\""
                              (connection-session connection)
                              (connection-address-string connection)
@@ -491,33 +541,32 @@ THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
     (when (> (http-reply-protocol reply) 0.9)
       (with-accessors ((headers http-reply-headers)) reply
-       (push (format nil "Date: ~A" (http-reply-date reply)) headers)
-       (push (format nil "Content-Length: ~D" content-len) headers)
-       (push (format nil "Content-Type: ~A; charset=~A"
-                     (http-reply-mime-type reply)
-                     (symbol-name (http-reply-charset reply)))
-             headers)
+       (http-reply-header-set
+        reply
+        (list "Date" (http-reply-date reply)))
+       (when (http-reply-flush-len reply)
+         (http-reply-header-set
+          reply
+          (list "Content-Length" "~D" content-len)
+          (list "Content-Type" "~A; charset=~A"
+                (http-reply-mime-type reply)
+                (symbol-name (http-reply-charset reply)))))
        (if (and request
                 (http-request-keep-alive request))
            (when (= 1.0 (http-request-protocol request))
-             (push (format nil "Keep-Alive: timeout=~D, max=~D"
-                           (config-request-keepalive-timeout config)
-                           (config-request-keepalive-max config))
-                   headers)
-             (push "Connection: Keep-Alive" headers))
-           (push "Connection: close" headers))
-       ;; Must push last so that it gets displayed first
-       (push (format nil "HTTP/1.1 ~A ~A"
-                     (http-reply-code reply)
-                     (http-reply-description reply))
-             headers)
-       (write-string
-        (concatenate 'string
-                     (reduce #'(lambda (a b)
-                                 (concatenate 'string a crlf b))
-                             headers)
-                     crlf crlf)
-        stream)))
+             (http-reply-header-set
+              reply
+              (list "Keep-Alive" "timeout=~D, max=~D"
+                    (config-request-keepalive-timeout config)
+                    (config-request-keepalive-max config))
+              '("Connection" "Keep-Alive")))
+           (http-reply-header-set reply
+                                  '("Connection" "close")))
+       (format stream "HTTP/1.1 ~A ~A~A"
+               (http-reply-code reply)
+               (http-reply-description reply)
+               *crlf*)
+       (write-string (http-reply-headers-string reply) stream)))
     (unless size
       (loop
         with rcontent = (reverse content)
@@ -525,7 +574,8 @@ THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
         do
           (write-sequence s stream)))
     (finish-output stream))
-  (when (or (null size) (zerop size))
+  (when (and (http-reply-flush-signal reply)
+            (or (null size) (zerop size)))
     (error (if (http-reply-no-keepalive reply)
               'http-reply-signal-no-keepalive
               'http-reply-signal)))
@@ -539,9 +589,11 @@ THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
                                :description message
                                :no-keepalive t))
        (description (if fmt
-                        (apply #'format nil fmt fmt-args)
+                        (if fmt-args
+                            (apply #'format nil fmt fmt-args)
+                            nil)
                         nil)))
-    (http-reply-nocache reply)
+    (http-reply-header-set-nocache reply)
     (http-reply-content-add
      reply
      (let ((title (html-escape (format nil "~A - ~A" code message))))
@@ -558,8 +610,8 @@ THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   (let ((reply (make-http-reply :code 501
                                :description "Method Not Implemented"
                                :no-keepalive t)))
-    (http-reply-nocache reply)
-    (http-reply-header-add reply "Allow: GET, HEAD, POST")
+    (http-reply-header-set-nocache reply)
+    (http-reply-header-set reply '("Allow" "GET, HEAD, POST"))
     (http-reply-content-add
      reply
      (let ((title "501 - Method Not Implemented"))
@@ -583,8 +635,8 @@ THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     (unless path
       (http-error stream 403 "Forbidden"
        "You do not have the permission to access this resource."))
-    (http-reply-nocache reply)
-    (http-reply-header-add reply "Location: ~A" movedto)
+    (http-reply-header-set-nocache reply)
+    (http-reply-header-set reply (list "Location" "~A" movedto))
     (http-reply-content-add
      reply
      (do-html nil
@@ -847,7 +899,7 @@ THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
               lines))))
 
 ;;; Request parsing preprocessor.
-;;; Extracts query from the first request line and coalesces continuating
+;;; Extracts query from the first request line and coalesces continuing
 ;;; header lines.  Returns the request line as first value and the list
 ;;; of preprocessed lines as second value.
 (defun http-request-parse-1 (lines)
@@ -900,6 +952,116 @@ THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 (defun http-request-disable-keepalive (request)
   (setf (http-request-keep-alive request) nil))
 
+;;; Parses an HTTP Range of the format [bytes=][from]-[to][,[from]-[to]...]
+;;; and returns a list of (or null integer) pairs.  The range is not
+;;; checked for sanity yet, as this also requires knowing the size of
+;;; the resource.  NIL is returned if the range(s) could not be parsed.
+;;; User code may use RANGE-VALIDATE to perform further validation and
+;;; processing.
+(defun range-parse (line)
+  (let* ((pos (search "bytes=" line :test #'string-equal))
+        (spos (if pos 6 0)))
+    ;; LOOP has better inlining potential than MAP below and is as clean
+    (loop
+       with l = (loop
+                  with v = (string-split line
+                                         :separators '(#\,)
+                                         :start spos
+                                         :max 16)
+                  for p across v
+                  collect (concatenate 'string " " p " "))
+       for p in l
+       collect (loop
+                 with v = (string-split p
+                                        :separators '(#\-)
+                                        :trim-parts '(#\Space)
+                                        :max 2)
+                 for n across v
+                 collect (parse-integer n :junk-allowed t)))))
+
+;;; This part of RFC 2616 seems to have been designed for humans, yet
+;;; software must deal with it, but this is a common theme in HTTP, like
+;;; the parsing of RFC 822/850/1123 dates intended for humans.
+;;; XXX Eventually cleanup this messy function
+(defun range-validate (req size)
+  "Attempts to validate client-provided range(s), if supplied.
+SIZE specifies the size of the resource to serve (i.e. a file length).
+On success, returns a list of pre-processed range pair(s) filled with actual
+byte offsets.  On failure, returns :NOT-SATISFIABLE or NIL (empty list).
+The returned range has any overlapping ranges optimized and any redundant
+contiguous ranges coalesced for the caller.  If the result only consists of
+a single range covering the whole document, NIL is returned."
+  (unless (http-request-range req)
+    (return-from range-validate nil))
+  ;; First convert human-friendly ambiguous ranges to actual byte ranges
+  (let* ((byte-ranges
+         (loop
+            with ranges = (http-request-range req)
+            for r in ranges
+            for (from to) = r
+            do
+              (cond ((null from)
+                     (when (null to)
+                       (return nil))
+                     (if (>= to size)
+                         (setf from 0
+                               to (1- size))
+                         (setf from (- size to)
+                               to (1- size))))
+                    ((null to)
+                     (when (null from)
+                       (return nil))
+                     (setf to (1- size)))
+                    (t
+                     (when (>= to size)
+                       (setf to (1- size)))))
+              (unless (<= from to)
+                (return nil))
+              (unless (< to size)
+                (return :not-satisfiable))
+            collect (list from to)))
+
+        ;; Optimize any overlapping ranges, eliminating redundant ones
+        (optimized-ranges
+         (loop
+            for r1 in byte-ranges
+            for r2 = (loop
+                        for r3 in byte-ranges
+                        when (and (<= (first r1) (second r3))
+                                  (>= (second r1) (first r3)))
+                        collect r3)
+            for optimized = (list (reduce #'min r2 :key #'first)
+                                  (reduce #'max r2 :key #'second))
+            unless (member optimized final
+                           :test #'(lambda (a b)
+                                     (and (= (first a) (first b))
+                                          (= (second a) (second b)))))
+            collect optimized into final
+            finally (return (sort final #'(lambda (a b)
+                                            (< (first a) (first b)))))))
+
+        ;; Coalesce any redundant contiguous ranges together
+        (coalesced-ranges
+         (loop
+            for r on optimized-ranges
+            when (and (cdr r)
+                      (= (second (car r))
+                         (1- (first (cadr r)))))
+            collect (prog1
+                        (list (first (car r)) (second (cadr r)))
+                      (setf r (cdr r)))
+            else
+            collect (car r))))
+
+    ;; If there only remains a single range which covers the whole
+    ;; document, ignore it.
+    (if (and coalesced-ranges
+            (destructuring-bind (from to) (first coalesced-ranges)
+              (and (= 0 from) (= (1- size) to))))
+       nil
+       coalesced-ranges)))
+
+
 ;;; List of headers we care about and functions to fill them.
 ;;; We later on fill a hash table using this list for fast lookups.
 ;;; CLOS could have been used instead after interning a keyword symbol
@@ -946,7 +1108,7 @@ THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
                (server-time-rfc-parse v))))
     ("range"
      ,#'(lambda (o v)
-         (setf (http-request-range o) v)))
+         (setf (http-request-range o) (range-parse v))))
     ("cookie"
      ,#'(lambda (o v)
          (property-set (http-request-vars-cookie o) v)))))
@@ -959,6 +1121,7 @@ THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
          *header-list*)
     ht))
 
+
 ;;; Reads and parses POST data request if any
 ;;; XXX Should at least also support "multipart/form-data" enctype
 (defun http-post-parse (stream)
@@ -1100,7 +1263,8 @@ THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
                      (subseq line (1+ (position #\: line :start
                                                 (length var)))))
           for fun = (gethash var header-table)
-          when fun do (funcall fun req val)))
+          when fun do (ignore-errors ; Extra parsing carefulness
+                        (funcall fun req val))))
 
       ;; Separate path from query variables; fill in GET variables if any.
       (let* ((path (http-request-path req))
@@ -1173,11 +1337,9 @@ REQ is bound to VAR."
     (and debug (position keyword debug :test #'eq))))
 
 (defun beep ()
-  (handler-case
-      (with-open-file (s "/dev/speaker" :direction :output)
-       (write-string "O1L15D" s))
-    (t ()
-      nil)))
+  (ignore-errors
+    (with-open-file (s "/dev/speaker" :direction :output)
+      (write-string "O1L15D" s))))
 
 (defun dump-vars (ht)
   (with-output-to-string (out)
@@ -1202,12 +1364,12 @@ REQ is bound to VAR."
                 "the system is developed interactively as spare time "
                 "permits.  A particularity of Lisp is that it can be "
                 "used as a scripting language interactively, with wanted "
-                "modified clode blocks reapplied in the live image. "
+                "modified code blocks reapplied in the live image. "
                 "These code blocks may be reapplied as compiled bytecode "
                 "for interpretation (or in this case, using ECL, be "
-                "recompiled efficiently to C and linked as a dynamic "
-                "loadable module, and reloaded immediately when typing "
-                "C-c C-c on a code block in Emacs).")
+                "recompiled efficiently to C, linked as a dynamic "
+                "loadable module and reloaded immediately, which typing "
+                "C-c C-c on a code block in Emacs automates).")
             (:p
              "Follow " (:a :href "/" "this link") " to proceed to a "
              "mirror of my site hosted on this test server.")
@@ -1477,29 +1639,110 @@ REQ is bound to VAR."
                  (http-reply-description reply) "Precondition Failed")
            (http-reply-flush reply stream 0)))
 
-       ;; Range
-       ;; XXX 416 Requested Range Not Satisfiable
-
        ;; Finally send file (except for HEAD)
-       (http-reply-header-add reply "Last-Modified: ~A" lastmod)
+       (http-reply-header-set reply (list "Last-Modified" "~A" lastmod))
+
        (handler-case
            (progn
              (with-open-file (in truepath
                                  :direction :input
                                  :element-type '(unsigned-byte 8))
-               (http-reply-flush reply stream (file-length in))
-               (unless (eq :head (http-request-method req))
-                 (loop
-                    with seq = *buffer*
-                    with seqsize of-type fixnum = (array-dimension seq 0)
-                    for len of-type fixnum = (read-sequence seq in)
-                    do (write-sequence seq stream :end len)
-                    while (= seqsize len))
-                 (finish-output stream))))
+               (let* ((size (file-length in))
+                      (ranges (range-validate req size)))
+                 (when (eq :not-satisfiable ranges)
+                   (http-error stream 416
+                               "Requested Range Not Satisfiable"))
+
+ (cond ((null ranges)
+
+       ;; Optimized for full file transfer
+       (http-reply-flush reply stream size)
+       (unless (eq :head (http-request-method req))
+         (loop
+            with seq = *buffer*
+            with seqsize of-type fixnum = (array-dimension seq 0)
+            for len of-type fixnum = (read-sequence seq in)
+            do (write-sequence seq stream :end len)
+            while (= seqsize len))
+         (finish-output stream)))
+
+       (t
+
+       ;; Transfer ranges
+       (let ((multiple-ranges-p (> (length ranges) 1))
+             (offset 0)
+             (boundary-string nil))
+         (cond (multiple-ranges-p
+                ;; XXX Perhaps use BASE64 instead of 16 below
+                (setf boundary-string (format nil "~64,'0X" (random #xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF))
+                      (http-reply-flush-signal reply) nil
+                      (http-reply-flush-len reply) nil
+                      (http-reply-code reply) 206
+                      (http-reply-description reply) "Partial Content")
+                (http-reply-header-set
+                 reply
+                 (list "Content-Type"
+                       "multipart/byteranges; boundary=~A"
+                       boundary-string))
+                (http-reply-flush reply stream)
+                (http-reply-header-set
+                 reply
+                 (list "Content-Type" "~A; charset=~A"
+                       (http-reply-mime-type reply)
+                       (symbol-name (http-reply-charset reply))))
+                (http-reply-header-unset reply "Date")
+                (http-reply-header-unset reply "Last-Modified")
+                (http-reply-header-unset reply "Server"))
+               (t
+                (setf (http-reply-code reply) 206
+                      (http-reply-description reply) "Partial Content")
+                (destructuring-bind (from to) (first ranges)
+                  (http-reply-header-set reply
+                                         (list "Content-Range"
+                                               "bytes ~D-~D/~D"
+                                               from to size))
+                  (http-reply-flush reply stream (- to from)))))
+         (loop
+            with orig-size = size
+            with crlf = *crlf*
+            for range in ranges
+            for (from to) = range
+            for size = (if (= to from)
+                           1
+                           (- to from))
+            when (/= offset from) do (progn
+                                       (file-position in from)
+                                       (setf offset from))
+            do
+              (when multiple-ranges-p
+                (format stream "--~A~A" boundary-string crlf)
+                (http-reply-header-set
+                 reply
+                 (list "Content-Range" "bytes ~D-~D/~D"
+                       from to orig-size)
+                 (list "Content-Length" size))
+                (write-string (http-reply-headers-string reply) stream))
+              (loop
+                 with seq = *buffer*
+                 with seqsize of-type fixnum = (array-dimension seq 0)
+                 for len of-type fixnum = (read-sequence
+                                           seq in
+                                           :end (min seqsize size))
+                 do
+                   (write-sequence seq stream :end len)
+                   (decf size len)
+                 while (and (/= 0 len) (= seqsize len)))
+              (when multiple-ranges-p
+                (write-string crlf stream)) ; XXX Is this legal?
+              (finish-output stream))))))))
+
+         ;; Handled errors
          (file-error ()
            (http-error stream 403 "Forbidden"
-             "You do not have the permission to access \"~A\"."
-             (path-virtual path))))))))
+                       "You do not have the permission to access \"~A\"."
+                       (path-virtual path)))))))
+  (values))
+
 
 ;;; Actual entry point from SERVER
 (defun http-serve (connection)