(in-package :cl-user) (defpackage #:net.acdw.clarence (:nicknames #:clarence) (:use :cl) (:import-from :cl-ppcre #:split #:regex-replace-all) (:export #:*tags* #:line #:make-line #:copy-line #:line-p #:line-tag #:line-content #:link #:make-link #:copy-link #:link-p #:link-tag #:link-content #:link-href #:stanza #:make-stanza #:copy-stanza #:stanza-p #:stanza-tag #:stanza-content #:verbatim #:copy-verbatim #:verbatim-p #:verbatim-tag #:verbatim-content #:verbatim-open #:verbatim-close #:verbatim-alt #:document #:make-document #:copy-document #:document-p #:document-content #:blank #:blankp #:parse-line #:parse-lines #:parse #:parse-string #:parse-file #:writer #:make-writer #:copy-writer #:writer-p #:writer-name #:writer-line-format #:writer-inline-format #:writer-stanza-format #:writer-document-format #:writer-tag-formats #:writer-line-preproc #:writer-inline-preproc #:writer-stanza-preproc #:writer-document-preproc #:writer-tag-preprocs #:writer-line-postproc #:writer-inline-postproc #:writer-stanza-postproc #:writer-document-postproc #:writer-tag-postprocs #:defwriter #:*current-writer* #:*writers* #:tag-of #:format-of #:preprocs-of #:postprocs-of #:line-string #:stanza-string #:document-string #:fold-text #:unfold-text #:combine-like #:character-wrap #:gemtext #:html #:escape-html #:escape-html/pre #:add-inline-markup #:verbatim-preproc)) (in-package :net.acdw.clarence) ;;; Parameters (defparameter *tags* '(("###" . :h3) ("##" . :h2) ("#" . :h1) ("=>" . :link) ("> " . :quote) ("* " . :list) ("```" . :verbatim)) "Alist of source sigil to internal tags. It's important that if tags start with the same characters, the longer tag is first.") ;;; Gemtext types (defstruct line tag content) (defstruct (stanza (:constructor %make-stanza)) tag content) (defstruct document content) ;; Derived types (defstruct (link (:include line) (:constructor %make-link)) href) (defstruct (verbatim (:include stanza) (:constructor %make-verbatim)) alt open close) ;; Blank lines (defvar blank (make-line :tag :blank)) (defun blankp (line) "Is LINE blank?" (and (line-p line) (or (eq :blank (line-tag line)) (zerop (length (line-content line)))))) ;; Constructors (defun make-link (&key (content "") (href "")) "Make an instance of LINK." (%make-link :tag :link :content content :href href)) (defun make-stanza (lines) (let ((tag (line-tag (car lines)))) (if (eq :verbatim tag) (%make-verbatim :tag :verbatim :content ; the first and last lines are ``` (nreverse (cdr (nreverse (cdr lines)))) :alt (line-content (car lines))) (%make-stanza :tag tag :content lines)))) ;;; Reading (defun parse-line (input-string &key (verbatimp nil)) "Parse a LINE from INPUT-STRING. If VERBATIMP is non-nil, return a verbatim line." (let* ((tag (tag-of input-string)) (text (chomp (subseq input-string (length (car (rassoc tag *tags*))))))) (cond (verbatimp (make-line :tag :verbatim :content input-string)) ((eq :link tag) (let* ((%split-line (split "\\s" text)) (href (chomp (car %split-line))) (text (if (cdr %split-line) (chomp (format nil "~{~A~^ ~}" (cdr %split-line))) href))) (make-link :href href :content text))) ((zerop (length text)) (make-line :tag :blank)) (t (make-line :tag tag :content text))))) (defun parse-lines (&optional (input *standard-input*)) "Parse a list of LINEs from INPUT." (loop for this = (read-line input nil) with verbatimp = nil until (null this) collect (parse-line this :verbatimp verbatimp) when (eq :verbatim (tag-of this)) do (setf verbatimp (not verbatimp)))) (defun group-stanzas (lines) "Group LINES into a list of STANZAs." (flet ((%accumulate-stanzas (stanzas line) (let ((stanza (car stanzas))) (cond ;; A naked line goes into a stanza by itself. ((not stanza) (list (list line) nil)) ;; Two links in a row -- start a new stanza. ((and (eq :link (line-tag line)) (eq :link (line-tag (car stanza))) (cadr stanza) (null (line-tag (cadr stanza)))) (list* (list line (car stanza)) (make-stanza (nreverse (cdr stanza))) (cdr stanzas))) ;; Same line type -- continue this stanza. ((eq (line-tag line) (and (car stanza) (line-tag (car stanza)))) (list* (cons line stanza) (cdr stanzas))) ;; Links within paragraphs --- continue the stanza. ((or (and (eq :link (line-tag line)) (null (line-tag (car stanza))) (not (blankp (car stanza)))) (and (null (line-tag line)) (eq :link (line-tag (car stanza))) (cadr stanza) (null (line-tag (cadr stanza))))) (list* (cons line stanza) (cdr stanzas))) ;; Different tags --- start a new stanza. (t (list* (list line) (make-stanza (nreverse stanza)) (cdr stanzas))))))) (let ((stanzas (remove-if (lambda (st) (and (stanza-p st) (eq :blank (stanza-tag st)))) (reduce #'%accumulate-stanzas lines :initial-value ())))) (remove-if #'null (nreverse (cons (make-stanza (nreverse (car stanzas))) (cdr stanzas))))))) (defun parse (source) (make-document :content (group-stanzas (parse-lines source)))) (defun parse-string (string) (with-input-from-string (s string) (parse s))) (defun parse-file (file) (with-open-file (s file) (parse s))) ;;; Writing ;; Type (defstruct writer "A WRITER holds information needed to write a document. *-FORMAT slots hold format specifiers for elements. A bare string is taken as a format specifier to be applied to that elements' CONTENT; a list is expected to have a CAR that's the specifier and a CDR full of slot names for the given object. T is substituted with the object's formatted content. These slots are for plain (nil-tagged) text. TAG-FORMATS hold differnet formats for each tag. This slot is an alist where each element is (TAG . PLIST). PLIST keys are either :LINE, :INLINE, or :STANZA, and the values are as in *-FORMAT slots. The PREPROC and POSTPROC slots of a WRITER are similar to FORMAT slots, except they hold preprocessing and postprocessing functions, respectively. Preprocessors are applied to the raw object before formatting it; postprocessors are applied to the resulting, formatted text." name ;; FORMATs control how element text is emitted. STANZA and DOCUMENT formats ;; should handle lists. (line-format "~&~A") inline-format (stanza-format "~&~{~A~^~&~}~%~%") document-format tag-formats ;; PREPROCs are lists of functions that operate, in order, on the raw data ;; structure before it's formatted. line-preproc inline-preproc stanza-preproc document-preproc tag-preprocs ;; POSTPROCs are lists of functions that operate, in order, on the formatted ;; text of the given structure. line-postproc inline-postproc stanza-postproc document-postproc tag-postprocs) (defmacro defwriter (name &rest make-writer-args) (let ((name (intern (format nil "*~A-WRITER*" name))) (keyword (intern (format nil "~A" name) :keyword))) `(progn (defvar ,name (make-writer :name ,keyword ,@make-writer-args)) (pushnew ,name *writers*)))) (defun writer (x) "Ensure X is a writer. If X is a writer, return it. If it's a string-designator, find the writer named X in *WRITERs*. If it doesn't exist or is another type, error." (etypecase x (writer x) ((or symbol string character) ; string-designator (or (find x *writers* :key #'writer-name :test #'string-equal))))) ;; Parameters (defparameter *current-writer* (make-writer) "The WRITER that's currently being used.") (defparameter *writers* (list *current-writer*) "A list of known WRITERs.") ;; Queries (defun tag-of (input) "Return the tag of INPUT line." (loop for (sigil . tag) in *tags* when (uiop:string-prefix-p sigil input) do (return tag))) (defun format-of (level tag writer) "Get the format of TAG at LEVEL in WRITER. If a given format doesn't exist, it falls through in this order: 1. -line -> line 2. -stanza -> stanza 3. -inline -> -line -> inline -> line 4. document -> stanza" (let* ((formats (writer-tag-formats writer)) (tag-formats (cdr (assoc tag formats))) (target (getf tag-formats level))) (uiop:ensure-list (if target target (case level (:line ; 1. (writer-line-format writer)) (:stanza ; 2. -- or ts -> s -> tl -> l ? (or (getf tag-formats :stanza) (writer-stanza-format writer))) (:inline ; 3. (or (getf tag-formats :line) (writer-inline-format writer) (writer-line-format writer))) (:document ; 4. (or (writer-document-format writer) (writer-stanza-format writer)))))))) (defun preprocs-of (level tag writer) "Return a function composing all the preprocessors of TAG at LEVEL." (let ((fns (uiop:ensure-list (or (getf (cdr (assoc tag (writer-tag-preprocs writer))) level) (slot-value writer (ecase level (:line 'line-preproc) (:inline 'inline-preproc) (:stanza 'stanza-preproc) (:document 'document-preproc))))))) (apply #'compose #'identity fns))) (defun postprocs-of (level tag writer) "Return a function composing all the postprocessors of TAG at LEVEL." (let ((fns (uiop:ensure-list (or (getf (cdr (assoc tag (writer-tag-postprocs writer))) level) (slot-value writer (ecase level (:line 'line-postproc) (:inline 'inline-postproc) (:stanza 'stanza-postproc) (:document 'document-postproc))))))) (apply #'compose #'identity fns))) ;; Output strings (defun line-string (line &key inlinep (writer (or *current-writer* (make-writer)))) "Return a string of LINE's written representation. If INLINEP is non-nil, format according to :inline, else :line." (let* ((writer (writer writer)) (level (if inlinep :inline :line)) (format-args (format-of level (line-tag line) writer)) (line (funcall (preprocs-of level (line-tag line) writer) line))) (funcall (postprocs-of level (line-tag line) writer) (apply #'format nil (car format-args) (mapcar (lambda (slot) (if (eq slot t) (line-content line) (slot-value line slot))) (or (cdr format-args) '(t))))))) (defun stanza-string (stanza &key (writer (or *current-writer* (make-writer)))) "Return a string of STANZA's written representation." (let* ((writer (writer writer)) (stanza (funcall (preprocs-of :stanza (stanza-tag stanza) writer) stanza)) (format-args (format-of :stanza (stanza-tag stanza) writer)) (formatted-lines (loop for line in (stanza-content stanza) collect (line-string line :inlinep (not (eq (line-tag line) (stanza-tag stanza))) :writer writer)))) (funcall (postprocs-of :stanza (stanza-tag stanza) writer) (apply #'format nil (car format-args) (mapcar (lambda (slot) (if (eq slot t) formatted-lines (slot-value stanza slot))) (or (cdr format-args) '(t))))))) (defun document-string (document &key (writer (or *current-writer* (make-writer)))) "Return a string of DOCUMENT's written representation." (let* ((writer (writer writer)) (document (funcall (preprocs-of :document nil writer) document)) (format-args (format-of :document nil writer)) (formatted-stanzas (loop for stanza in (document-content document) collect (stanza-string stanza :writer writer)))) (funcall (postprocs-of :document nil writer) (apply #'format nil (car format-args) (mapcar (lambda (slot) (if (eq slot t) formatted-stanzas (slot-value document slot))) (or (cdr format-args) '(t))))))) ;;; Processors (defun fold-text (string &key (width 72) (prefix 0)) ;; TODO: fold-text-preserving-gemini-links "Fold (word-wrap) STRING to WIDTH." (let ((words (split "\\s+" string)) (current-width 0)) (loop for word in words for word-width = (length word) if (>= (+ word-width current-width) width) do (progn (format t "~&~,,v@a " prefix word) (setf current-width (+ word-width 1 prefix))) else do (progn (format t "~a " word) (incf current-width (1+ word-width)))))) (defun unfold-text (string) "Replace newlines in STRING with spaces." (regex-replace-all "\\n" (chomp string) " ")) (defun combine-like (stanza) "Combine lines with similar tags in STANZA together." (let ((combined (reduce (lambda (lines line) (let ((this (car lines))) (if (and this (not (link-p line)) (eq (line-tag this) (line-tag line))) (cons (make-line :tag (line-tag line) :content (format nil "~A ~A" (line-content this) (line-content line))) (cdr lines)) (cons line lines)))) (stanza-content stanza) :initial-value ()))) (make-stanza (nreverse combined)))) (defun character-wrap (string char open close) "Convert runs of 'CHAR ... CHAR' in STRING to 'OPEN ... CLOSE'. CHAR must be on a word boundary. Example: (character-wrap \"some *body* once told me\" #\* \"\" \"<\/b>\") => \"some body once told me\"." (ppcre:regex-replace-all `(:sequence (:register (:alternation :whitespace-char-class :start-anchor #\- #\[ #\( #\{ #\" #\' #\<)) ,char (:register (:sequence :non-whitespace-char-class (:non-greedy-repetition 0 nil :everything) :non-whitespace-char-class)) ,char (:register (:alternation :whitespace-char-class :end-anchor #\, #\. #\? #\! #\; #\: #\- #\] #\) #\} #\" #\' #\>))) string (format nil "\\1~A\\2~A\\3" open close))) ;;; Predefined writers (defwriter gemtext :tag-formats '((:h3 :line "~&### ~A") (:h2 :line "~&## ~A") (:h1 :line "~&# ~A") (:quote :line "~&> ~A") (:list :line "~&* ~A") (:link :line ("~&=> ~A ~A" href t)) (:verbatim :line "~&~A" :stanza ("~&```~A~%~{~A~%~}```~&" alt t))) :stanza-preproc '(combine-like) :line-postproc '(unfold-text)) (defwriter html :stanza-format "~&

~%~{~A~%~}

" :document-format "~{~A~%~}" :tag-formats '((:verbatim ;; See VERBATIM-PREPROC :stanza ("~&~A~{~A~^~%~}~A" open t close)) (:link :line ("~&
  • ~A
  • " href t) :stanza "~&
      ~%~{~A~%~}
    " :inline ("~A " href t)) (:list :line "~&
  • ~A
  • " :stanza "~&
      ~%~{~A~%~}
    ") (:quote :stanza "~&
    ~%~{~A~%~}
    ") (:h1 :stanza "~&

    ~{~A~^ ~}

    ") (:h2 :stanza "~&

    ~{~A~^ ~}

    ") (:h3 :stanza "~&

    ~{~A~^ ~}

    ")) :line-preproc '(add-inline-markup escape-html/pre) :tag-preprocs '((:verbatim :stanza (verbatim-preproc)))) (defun escape-html (text) "Escape HTML entities in TEXT." (ppcre:regex-replace-all "[&<>]" text (lambda (target-string start end match-start match-end reg-starts reg-ends) (declare (ignore start end match-end reg-starts reg-ends)) (case (char target-string match-start) (#\< "<") (#\> ">") (#\& "&"))))) (defun escape-html/pre (line) (if (link-p line) (make-link :href (link-href line) :content (escape-html (link-content line))) (make-line :tag (line-tag line) :content (escape-html (line-content line))))) (defun add-inline-markup (line) "Convert *inline* =markup= to HTML in a LINE." (let* ((text (line-content line)) (text (character-wrap text #\* "" "")) (text (character-wrap text #\_ "" "")) (text (character-wrap text #\= "" ""))) (if (link-p line) (make-link :content text :href (link-href line)) (make-line :tag (line-tag line) :content text)))) (defun verbatim-preproc (verb) "Give VERB the correct OPEN and CLOSE tags." (let ((alt (verbatim-alt verb)) (open "
    ")
            (close "
    ") (preproc '(escape-html/pre))) (unless (zerop (length alt)) (case (char alt 0) ((#\. #\#) ; .classes, #id (multiple-value-bind (classes id) (loop for x in (ppcre:all-matches-as-strings "[#.][^#.]+" alt) if (eq #\. (char x 0)) collect (subseq x 1) into classes if (eq #\# (char x 0)) collect (subseq x 1) into id finally (return (values classes id))) (setf open (format nil "" (if classes " class=\"~{~A~^ ~}\"" "~*") classes (if id " id=\"~{~A~^ ~}\"" "~*") id)))) (#\< ; custom HTML tag (if (uiop:string-prefix-p "<>" alt) ;; ```<> means interpret the verbatim block as raw HTML (setf open "" close "" preproc nil) (setf open alt close (ppcre:regex-replace-all "<(\\S+)[^>]+>" open "")))) (#\| ; pipe to an external process (let* ((stanza-text (loop for ln in (verbatim-content verb) collect (line-content ln) into ss finally (return (format nil "~{~A~^~%~}" ss)))) (command-line (ppcre:split "\\s" (subseq alt 1))) (piped-text (ignore-errors (with-input-from-string (s stanza-text) (uiop:run-program command-line :input s :output :string))))) ;; Here it's easier to just return a new stanza altogether (when piped-text (setf open (format nil "~%" alt) close "") (setf verb (%make-verbatim :tag :verbatim :content (with-input-from-string (s piped-text) (parse-lines s))))))) (t ))) (%make-verbatim :tag :verbatim :alt (verbatim-alt verb) :open open :close close :content (mapcar (apply #'compose #'identity preproc) (verbatim-content verb))))) ;;; Utilities (defun chomp (string) "Trim whitespace off both ends of STRING." (string-trim '(#\newline #\space #\tab) string)) (defun compose (function &rest more-functions) ; yoinked from alexandria "Returns a function composed of FUNCTION and MORE-FUNCTIONS that applies its arguments to to each in turn, starting from the rightmost of MORE-FUNCTIONS, and then calling the next one with the primary value of the last." (reduce (lambda (f g) (lambda (&rest arguments) (declare (dynamic-extent arguments)) (funcall f (apply g arguments)))) more-functions :initial-value function))