code / clarence / clarence.lisp

1(in-package :cl-user)
2
3(defpackage #:net.acdw.clarence
4  (:nicknames #:clarence)
5  (:use :cl)
6  (:import-from :cl-ppcre
7                #:split
8                #:regex-replace-all)
9  (:export #:*tags*
10           #:line #:make-line #:copy-line #:line-p
11           #:line-tag #:line-content
12           #:link #:make-link #:copy-link #:link-p
13           #:link-tag #:link-content #:link-href
14           #:stanza #:make-stanza #:copy-stanza #:stanza-p
15           #:stanza-tag #:stanza-content
16           #:verbatim #:copy-verbatim #:verbatim-p
17           #:verbatim-tag #:verbatim-content
18           #:verbatim-open #:verbatim-close #:verbatim-alt
19           #:document #:make-document #:copy-document #:document-p
20           #:document-content
21           #:blank #:blankp
22           #:parse-line #:parse-lines #:parse #:parse-string #:parse-file
23           #:writer #:make-writer #:copy-writer #:writer-p
24           #:writer-name #:writer-line-format #:writer-inline-format
25           #:writer-stanza-format #:writer-document-format #:writer-tag-formats
26           #:writer-line-preproc #:writer-inline-preproc
27           #:writer-stanza-preproc #:writer-document-preproc
28           #:writer-tag-preprocs
29           #:writer-line-postproc #:writer-inline-postproc
30           #:writer-stanza-postproc #:writer-document-postproc
31           #:writer-tag-postprocs
32           #:defwriter #:*current-writer* #:*writers*
33           #:tag-of #:format-of #:preprocs-of #:postprocs-of
34           #:line-string #:stanza-string #:document-string
35           #:fold-text #:unfold-text #:combine-like #:character-wrap
36           #:gemtext
37           #:html #:escape-html #:escape-html/pre
38           #:add-inline-markup #:verbatim-preproc))
39
40(in-package :net.acdw.clarence)
41
42;;; Parameters
43
44(defparameter *tags*
45  '(("###" . :h3)
46    ("##" . :h2)
47    ("#" . :h1)
48    ("=>" . :link)
49    ("> " . :quote)
50    ("* " . :list)
51    ("```" . :verbatim))
52  "Alist of source sigil to internal tags.
53It's important that if tags start with the same characters, the longer tag is
54first.")
55
56;;; Gemtext types
57
58(defstruct line
59  tag content)
60
61(defstruct (stanza (:constructor %make-stanza))
62  tag content)
63
64(defstruct document
65  content)
66
67;; Derived types
68
69(defstruct (link (:include line)
70                 (:constructor %make-link))
71  href)
72
73(defstruct (verbatim (:include stanza)
74                     (:constructor %make-verbatim))
75  alt open close)
76
77;; Blank lines
78
79(defvar blank (make-line :tag :blank))
80
81(defun blankp (line)
82  "Is LINE blank?"
83  (and (line-p line)
84       (or (eq :blank (line-tag line))
85           (zerop (length (line-content line))))))
86
87;; Constructors
88
89(defun make-link (&key (content "") (href ""))
90  "Make an instance of LINK."
91  (%make-link :tag :link :content content :href href))
92
93(defun make-stanza (lines)
94  (let ((tag (line-tag (car lines))))
95    (if (eq :verbatim tag)
96        (%make-verbatim :tag :verbatim
97                        :content        ; the first and last lines are ```
98                        (nreverse (cdr (nreverse (cdr lines))))
99                        :alt (line-content (car lines)))
100        (%make-stanza :tag tag
101                      :content lines))))
102
103;;; Reading
104
105(defun parse-line (input-string &key (verbatimp nil))
106  "Parse a LINE from INPUT-STRING.
107If VERBATIMP is non-nil, return a verbatim line."
108  (let* ((tag (tag-of input-string))
109         (text (chomp (subseq input-string (length (car (rassoc tag *tags*)))))))
110    (cond
111      (verbatimp
112       (make-line :tag :verbatim :content input-string))
113      ((eq :link tag)
114       (let* ((%split-line (split "\\s" text))
115              (href (chomp (car %split-line)))
116              (text (if (cdr %split-line)
117                        (chomp (format nil "~{~A~^ ~}" (cdr %split-line)))
118                        href)))
119         (make-link :href href :content text)))
120      ((zerop (length text))
121       (make-line :tag :blank))
122      (t
123       (make-line :tag tag :content text)))))
124
125(defun parse-lines (&optional (input *standard-input*))
126  "Parse a list of LINEs from INPUT."
127  (loop for this = (read-line input nil)
128        with verbatimp = nil
129        until (null this)
130        collect (parse-line this :verbatimp verbatimp)
131        when (eq :verbatim (tag-of this))
132          do (setf verbatimp (not verbatimp))))
133
134(defun group-stanzas (lines)
135  "Group LINES into a list of STANZAs."
136  (flet ((%accumulate-stanzas (stanzas line)
137           (let ((stanza (car stanzas)))
138             (cond
139               ;; A naked line goes into a stanza by itself.
140               ((not stanza)
141                (list (list line) nil))
142               ;; Two links in a row -- start a new stanza.
143               ((and (eq :link (line-tag line))
144                     (eq :link (line-tag (car stanza)))
145                     (cadr stanza)
146                     (null (line-tag (cadr stanza))))
147                (list* (list line (car stanza))
148                       (make-stanza (nreverse (cdr stanza)))
149                       (cdr stanzas)))
150               ;; Same line type -- continue this stanza.
151               ((eq (line-tag line)
152                    (and (car stanza) (line-tag (car stanza))))
153                (list* (cons line stanza)
154                       (cdr stanzas)))
155               ;; Links within paragraphs --- continue the stanza.
156               ((or (and (eq :link (line-tag line))
157                         (null (line-tag (car stanza)))
158                         (not (blankp (car stanza))))
159                    (and (null (line-tag line))
160                         (eq :link (line-tag (car stanza)))
161                         (cadr stanza)
162                         (null (line-tag (cadr stanza)))))
163                (list* (cons line stanza)
164                       (cdr stanzas)))
165               ;; Different tags --- start a new stanza.
166               (t
167                (list* (list line)
168                       (make-stanza (nreverse stanza))
169                       (cdr stanzas)))))))
170    (let ((stanzas (remove-if (lambda (st)
171                                (and (stanza-p st)
172                                     (eq :blank (stanza-tag st))))
173                              (reduce #'%accumulate-stanzas lines
174                                      :initial-value ()))))
175      (remove-if #'null
176                 (nreverse (cons (make-stanza (nreverse (car stanzas)))
177                                 (cdr stanzas)))))))
178
179(defun parse (source)
180  (make-document :content (group-stanzas (parse-lines source))))
181
182(defun parse-string (string)
183  (with-input-from-string (s string)
184    (parse s)))
185
186(defun parse-file (file)
187  (with-open-file (s file)
188    (parse s)))
189
190;;; Writing
191
192;; Type
193
194(defstruct writer
195  "A WRITER holds information needed to write a document.
196
197*-FORMAT slots hold format specifiers for elements.  A bare string is taken as a
198format specifier to be applied to that elements' CONTENT; a list is expected to
199have a CAR that's the specifier and a CDR full of slot names for the given
200object.  T is substituted with the object's formatted content.  These slots are
201for plain (nil-tagged) text.
202
203TAG-FORMATS hold differnet formats for each tag.  This slot is an
204alist where each element is (TAG . PLIST).  PLIST keys are either :LINE,
205:INLINE, or :STANZA, and the values are as in *-FORMAT slots.
206
207The PREPROC and POSTPROC slots of a WRITER are similar to FORMAT slots, except
208they hold preprocessing and postprocessing functions, respectively.
209Preprocessors are applied to the raw object before formatting it; postprocessors
210are applied to the resulting, formatted text."
211  name
212  ;; FORMATs control how element text is emitted. STANZA and DOCUMENT formats
213  ;; should handle lists.
214  (line-format "~&~A")
215  inline-format
216  (stanza-format "~&~{~A~^~&~}~%~%")
217  document-format
218  tag-formats
219  ;; PREPROCs are lists of functions that operate, in order, on the raw data
220  ;; structure before it's formatted.
221  line-preproc
222  inline-preproc
223  stanza-preproc
224  document-preproc
225  tag-preprocs
226  ;; POSTPROCs are lists of functions that operate, in order, on the formatted
227  ;; text of the given structure.
228  line-postproc
229  inline-postproc
230  stanza-postproc
231  document-postproc
232  tag-postprocs)
233
234(defmacro defwriter (name &rest make-writer-args)
235  (let ((name (intern (format nil "*~A-WRITER*" name)))
236        (keyword (intern (format nil "~A" name) :keyword)))
237    `(progn (defvar ,name
238              (make-writer :name ,keyword ,@make-writer-args))
239            (pushnew ,name *writers*))))
240
241(defun writer (x)
242  "Ensure X is a writer.
243If X is a writer, return it.  If it's a string-designator, find the writer named
244X in *WRITERs*.  If it doesn't exist or is another type, error."
245  (etypecase x
246    (writer x)
247    ((or symbol string character)       ; string-designator
248     (or (find x *writers*
249               :key #'writer-name
250               :test #'string-equal)))))
251
252;; Parameters
253
254(defparameter *current-writer* (make-writer)
255  "The WRITER that's currently being used.")
256
257(defparameter *writers* (list *current-writer*)
258  "A list of known WRITERs.")
259
260;; Queries
261
262(defun tag-of (input)
263  "Return the tag of INPUT line."
264  (loop for (sigil . tag) in *tags*
265        when (uiop:string-prefix-p sigil input)
266          do (return tag)))
267
268(defun format-of (level tag writer)
269  "Get the format of TAG at LEVEL in WRITER.
270If a given format doesn't exist, it falls through in this order:
2711. <tag>-line -> line
2722. <tag>-stanza -> stanza
2733. <tag>-inline -> <tag>-line -> inline -> line
2744. document -> stanza"
275  (let* ((formats (writer-tag-formats writer))
276         (tag-formats (cdr (assoc tag formats)))
277         (target (getf tag-formats level)))
278    (uiop:ensure-list
279     (if target
280         target
281         (case level
282           (:line                        ; 1.
283            (writer-line-format writer))
284           (:stanza                      ; 2. -- or ts -> s -> tl -> l ?
285            (or (getf tag-formats :stanza)
286                (writer-stanza-format writer)))
287           (:inline                      ; 3.
288            (or (getf tag-formats :line)
289                (writer-inline-format writer)
290                (writer-line-format writer)))
291           (:document                   ; 4.
292            (or (writer-document-format writer)
293                (writer-stanza-format writer))))))))
294
295(defun preprocs-of (level tag writer)
296  "Return a function composing all the preprocessors of TAG at LEVEL."
297  (let ((fns (uiop:ensure-list
298              (or (getf (cdr (assoc tag (writer-tag-preprocs writer)))
299                        level)
300                  (slot-value writer
301                              (ecase level
302                                (:line 'line-preproc)
303                                (:inline 'inline-preproc)
304                                (:stanza 'stanza-preproc)
305                                (:document 'document-preproc)))))))
306    (apply #'compose #'identity fns)))
307
308(defun postprocs-of (level tag writer)
309  "Return a function composing all the postprocessors of TAG at LEVEL."
310  (let ((fns (uiop:ensure-list
311              (or (getf (cdr (assoc tag (writer-tag-postprocs writer)))
312                        level)
313                  (slot-value writer
314                              (ecase level
315                                (:line 'line-postproc)
316                                (:inline 'inline-postproc)
317                                (:stanza 'stanza-postproc)
318                                (:document 'document-postproc)))))))
319    (apply #'compose #'identity fns)))
320
321;; Output strings
322
323(defun line-string (line &key inlinep (writer (or *current-writer*
324                                                  (make-writer))))
325  "Return a string of LINE's written representation.
326If INLINEP is non-nil, format according to :inline, else :line."
327  (let* ((writer (writer writer))
328         (level (if inlinep :inline :line))
329         (format-args (format-of level (line-tag line) writer))
330         (line (funcall (preprocs-of level (line-tag line) writer)
331                        line)))
332    (funcall (postprocs-of level (line-tag line) writer)
333             (apply #'format nil (car format-args)
334                    (mapcar (lambda (slot)
335                              (if (eq slot t)
336                                  (line-content line)
337                                  (slot-value line slot)))
338                            (or (cdr format-args) '(t)))))))
339
340(defun stanza-string (stanza &key (writer (or *current-writer*
341                                              (make-writer))))
342  "Return a string of STANZA's written representation."
343  (let* ((writer (writer writer))
344         (stanza (funcall (preprocs-of :stanza (stanza-tag stanza) writer)
345                          stanza))
346         (format-args (format-of :stanza (stanza-tag stanza) writer))
347         (formatted-lines
348           (loop for line in (stanza-content stanza)
349                 collect
350                 (line-string line
351                              :inlinep (not (eq (line-tag line)
352                                                (stanza-tag stanza)))
353                              :writer writer))))
354    (funcall (postprocs-of :stanza (stanza-tag stanza) writer)
355             (apply #'format nil (car format-args)
356                    (mapcar (lambda (slot)
357                              (if (eq slot t)
358                                  formatted-lines
359                                  (slot-value stanza slot)))
360                            (or (cdr format-args) '(t)))))))
361
362(defun document-string (document &key (writer (or *current-writer*
363                                                  (make-writer))))
364  "Return a string of DOCUMENT's written representation."
365  (let* ((writer (writer writer))
366         (document (funcall (preprocs-of :document nil writer)
367                            document))
368         (format-args (format-of :document nil writer))
369         (formatted-stanzas
370           (loop for stanza in (document-content document)
371                 collect (stanza-string stanza :writer writer))))
372    (funcall (postprocs-of :document nil writer)
373             (apply #'format nil (car format-args)
374                    (mapcar (lambda (slot)
375                              (if (eq slot t)
376                                  formatted-stanzas
377                                  (slot-value document slot)))
378                            (or (cdr format-args) '(t)))))))
379
380;;; Processors
381
382(defun fold-text (string &key (width 72) (prefix 0))
383  ;; TODO: fold-text-preserving-gemini-links
384  "Fold (word-wrap) STRING to WIDTH."
385  (let ((words (split "\\s+" string))
386        (current-width 0))
387    (loop for word in words
388          for word-width = (length word)
389          if (>= (+ word-width current-width) width)
390            do (progn (format t "~&~,,v@a " prefix word)
391                      (setf current-width (+ word-width 1 prefix)))
392          else
393            do (progn (format t "~a " word)
394                      (incf current-width (1+ word-width))))))
395
396(defun unfold-text (string)
397  "Replace newlines in STRING with spaces."
398  (regex-replace-all "\\n" (chomp string) " "))
399
400(defun combine-like (stanza)
401  "Combine lines with similar tags in STANZA together."
402  (let ((combined
403          (reduce
404           (lambda (lines line)
405             (let ((this (car lines)))
406               (if (and this
407                        (not (link-p line))
408                        (eq (line-tag this)
409                            (line-tag line)))
410                   (cons (make-line :tag (line-tag line)
411                                    :content
412                                    (format nil "~A ~A"
413                                            (line-content this)
414                                            (line-content line)))
415                         (cdr lines))
416                   (cons line lines))))
417           (stanza-content stanza)
418           :initial-value ())))
419    (make-stanza (nreverse combined))))
420
421(defun character-wrap (string char open close)
422  "Convert runs of 'CHAR ... CHAR' in STRING to 'OPEN ... CLOSE'.
423CHAR must be on a word boundary.  Example:
424(character-wrap \"some *body* once told me\" #\* \"<b>\" \"<\/b>\") =>
425\"some <b>body</b> once told me\"."
426  (ppcre:regex-replace-all `(:sequence
427                             (:register
428                              (:alternation
429                               :whitespace-char-class
430                               :start-anchor
431                               #\- #\[ #\( #\{ #\" #\' #\{{CONTENT}}lt;))
432                             ,char
433                             (:register
434                              (:sequence
435                               :non-whitespace-char-class
436                               (:non-greedy-repetition 0 nil :everything)
437                               :non-whitespace-char-class))
438                             ,char
439                             (:register
440                              (:alternation
441                               :whitespace-char-class
442                               :end-anchor
443                               #\, #\. #\? #\! #\; #\: #\- #\] #\) #\}
444                               #\" #\' #\{{CONTENT}}gt;)))
445                           string
446                           (format nil "\\1~A\\2~A\\3" open close)))
447
448;;; Predefined writers
449
450(defwriter gemtext
451  :tag-formats '((:h3
452                  :line "~&### ~A")
453                 (:h2
454                  :line "~&## ~A")
455                 (:h1
456                  :line "~&# ~A")
457                 (:quote
458                  :line "~&> ~A")
459                 (:list
460                  :line "~&* ~A")
461                 (:link
462                  :line ("~&=> ~A ~A" href t))
463                 (:verbatim
464                  :line "~&~A"
465                  :stanza ("~&```~A~%~{~A~%~}```~&"
466                           alt t)))
467  :stanza-preproc '(combine-like)
468  :line-postproc '(unfold-text))
469
470(defwriter html
471  :stanza-format "~&<p>~%~{~A~%~}</p>"
472  :document-format "~{~A~%~}"
473  :tag-formats '((:verbatim
474                  ;; See VERBATIM-PREPROC
475                  :stanza ("~&~A~{~A~^~%~}~A" open t close))
476                 (:link
477                  :line ("~&<li><a href=\"~A\">~A</a></li>" href t)
478                  :stanza "~&<ul>~%~{~A~%~}</ul>"
479                  :inline ("<a href=\"~A\">~A</a> " href t))
480                 (:list
481                  :line "~&<li>~A</li>"
482                  :stanza "~&<ul>~%~{~A~%~}</ul>")
483                 (:quote
484                  :stanza "~&<blockquote>~%~{~A~%~}</blockquote>")
485                 (:h1
486                  :stanza "~&<h1>~{~A~^ ~}</h1>")
487                 (:h2
488                  :stanza "~&<h2>~{~A~^ ~}</h2>")
489                 (:h3
490                  :stanza "~&<h3>~{~A~^ ~}</h3>"))
491  :line-preproc '(add-inline-markup
492                  escape-html/pre)
493  :tag-preprocs '((:verbatim
494                   :stanza (verbatim-preproc))))
495
496(defun escape-html (text)
497  "Escape HTML entities in TEXT."
498  (ppcre:regex-replace-all
499   "[&<>]" text
500   (lambda (target-string start end match-start match-end reg-starts reg-ends)
501     (declare (ignore start end match-end reg-starts reg-ends))
502     (case (char target-string match-start)
503       (#\{{CONTENT}}lt; "&lt;")
504       (#\{{CONTENT}}gt; "&gt;")
505       (#\{{CONTENT}}amp; "&amp;")))))
506
507(defun escape-html/pre (line)
508  (if (link-p line)
509      (make-link :href (link-href line)
510                 :content (escape-html (link-content line)))
511      (make-line :tag (line-tag line)
512                 :content (escape-html (line-content line)))))
513
514(defun add-inline-markup (line)
515  "Convert *inline* =markup= to HTML in a LINE."
516  (let* ((text (line-content line))
517         (text (character-wrap text #\* "<b>" "</b>"))
518         (text (character-wrap text #\_ "<i>" "</i>"))
519         (text (character-wrap text #\= "<code>" "</code>")))
520    (if (link-p line)
521        (make-link :content text :href (link-href line))
522        (make-line :tag (line-tag line) :content text))))
523
524(defun verbatim-preproc (verb)
525  "Give VERB the correct OPEN and CLOSE tags."
526  (let ((alt (verbatim-alt verb))
527        (open "<pre><code>")
528        (close "</code></pre>")
529        (preproc '(escape-html/pre)))
530
531    (unless (zerop (length alt))
532      (case (char alt 0)
533        ((#\. #\#)                      ; .classes, #id
534         (multiple-value-bind (classes id)
535             (loop for x in (ppcre:all-matches-as-strings "[#.][^#.]+" alt)
536                   if (eq #\. (char x 0))
537                     collect (subseq x 1) into classes
538                   if (eq #\# (char x 0))
539                     collect (subseq x 1) into id
540                   finally (return (values classes id)))
541           (setf open (format nil "<pre~@?~@?><code>"
542                              (if classes " class=\"~{~A~^ ~}\"" "~*")
543                              classes
544                              (if id " id=\"~{~A~^ ~}\"" "~*")
545                              id))))
546
547        (#\{{CONTENT}}lt;                            ; custom HTML tag
548         (if (uiop:string-prefix-p "<>" alt)
549             ;; ```<> means interpret the verbatim block as raw HTML
550             (setf open ""
551                   close ""
552                   preproc nil)
553             (setf open alt
554                   close (ppcre:regex-replace-all "<(\\S+)[^>]+>" open "</\\1>"))))
555
556        (#\|                            ; pipe to an external process
557         (let* ((stanza-text
558                  (loop for ln in (verbatim-content verb)
559                        collect (line-content ln) into ss
560                        finally (return (format nil "~{~A~^~%~}" ss))))
561                (command-line
562                  (ppcre:split "\\s" (subseq alt 1)))
563                (piped-text
564                  (ignore-errors
565                   (with-input-from-string (s stanza-text)
566                     (uiop:run-program command-line :input s :output :string)))))
567           ;; Here it's easier to just return a new stanza altogether
568           (when piped-text
569             (setf open (format nil "<!-- ~A -->~%" alt)
570                   close "")
571             (setf verb (%make-verbatim
572                         :tag :verbatim
573                         :content (with-input-from-string (s piped-text)
574                                    (parse-lines s)))))))
575        (t
576         )))
577
578    (%make-verbatim
579     :tag :verbatim
580     :alt (verbatim-alt verb)
581     :open open
582     :close close
583     :content (mapcar (apply #'compose #'identity preproc)
584                      (verbatim-content verb)))))
585
586;;; Utilities
587
588(defun chomp (string)
589  "Trim whitespace off both ends of STRING."
590  (string-trim '(#\newline #\space #\tab) string))
591
592(defun compose (function &rest more-functions) ; yoinked from alexandria
593  "Returns a function composed of FUNCTION and MORE-FUNCTIONS that applies its
594arguments to to each in turn, starting from the rightmost of MORE-FUNCTIONS,
595and then calling the next one with the primary value of the last."
596  (reduce (lambda (f g)
597            (lambda (&rest arguments)
598              (declare (dynamic-extent arguments))
599              (funcall f (apply g arguments))))
600          more-functions
601          :initial-value function))