code / clarence / clarence.lisp

1(defpackage :net.acdw.clarence
2  (:nicknames #:clarence)
3  (:use :cl)
4  (:import-from :cl-ppcre
5                #:split
6                #:regex-replace-all)
7  (:import-from #:ppcre
8                #:regex-replace-all))
9
10(in-package :net.acdw.clarence)
11
12(defparameter *tags*
13  '(("###" . :h3)
14    ("##" . :h2)
15    ("#" . :h1)
16    ("=>" . :link)
17    ("> " . :quote)
18    ("* " . :list)
19    ("```" . :verbatim))
20  "Alist of source sigil to internal tags.")
21
22(defparameter *default-line-format* "~&~A"
23  "The default format-specifier for lines.")
24
25(defparameter *default-stanza-format* "~&~{~A~^~%~}~%~%"
26  "The default format-specifier for stanzas.
27Should contain a looping construct.")
28
29(defparameter *default-document-format* "~{~A~&~}"
30  "The default format-specifier for documents.
31Should contain a looping construct.")
32
33;;; Gemtext documents
34
35;; Basic types
36
37(defstruct tagged
38  "A tagged structure."
39  (tag nil :type (or null keyword))
40  (content nil))
41
42(defstruct (line (:include tagged))
43  "A tagged line of gemtext content.")
44
45(defstruct (stanza (:include tagged (content () :type t))
46                   (:constructor %make-stanza))
47  "A tagged group of lines.
48Stanzas are usually groups of all the same tag, but plain stanzas can include
49singular links.  A blank line always separates stanzas.
50A stanza's tag should be equal to the tag of its first line.")
51
52(defstruct document
53  "A group of stanzas representing a complete document.
54Note that there is no tag."
55  content)
56
57;; Derived types
58
59(defstruct (link (:include line (tag :link :read-only t)))
60  "A link to another document or resource, given by HREF."
61  (href nil :type (or null string)))
62
63(defstruct (verbatim (:include stanza (tag :verbatim :read-only t))
64                     (:constructor %make-verbatim))
65  "A verbatim block of text.
66This is the only stanza type that might contain a blank line.  ALT is optional
67alt-text, and OPEN and CLOSE are used by writers internally."
68  (alt nil :type (or null string))
69  (open nil :type (or null string))
70  (close nil :type (or null string)))
71
72;; Blank lines
73
74(defvar blank (make-line :tag :blank)
75  "A blank line.")
76
77(defmethod blankp ((line line))
78  "Is LINE blank?"
79  (or (tag-equal line :blank)
80      (null (line-content line))
81      (zerop (length (line-content line)))))
82
83(defmethod blankp ((stanza stanza))
84  "Is STANZA blank?"
85  (or (null (stanza-content stanza))
86      (tag-equal stanza :blank)))
87
88(defmethod blankp ((doc document))
89  (null (document-content doc)))
90
91(defmethod blankp ((list list))
92  (null list))
93
94;; Tag equality
95
96(defmethod tag-equal ((x null) y) nil)
97
98(defmethod tag-equal (x (y null)) nil)
99
100(defmethod tag-equal ((tag1 symbol) (tag2 symbol))
101  (eq tag1 tag2))
102
103(defmethod tag-equal ((tag symbol) (x tagged))
104  (eq tag (slot-value x 'tag)))
105
106(defmethod tag-equal ((x tagged) (tag symbol))
107  (eq tag (slot-value x 'tag)))
108
109(defmethod tag-equal ((x tagged) (y tagged))
110  (eq (slot-value x 'tag) (slot-value y 'tag)))
111
112(defun tag-null (x)
113  (tag-equal x nil))
114
115;; Constructors
116
117(defun make-stanza (lines)
118  "Make a STANZA from LINES.
119If the stanza is verbatim, remove the first and last lines from its
120content---those are each \"```\"."
121  (if (tag-equal (car lines) :verbatim)
122      (%make-verbatim :content (cdr lines)
123                      :alt (line-content (car lines)))
124      (%make-stanza :tag (line-tag (car lines))
125                    :content lines)))
126
127;;; Reading
128
129(defun tag-of (string)
130  "Return the tag of STRING by consulting *TAGS*."
131  (loop for (sigil . tag) in *tags*
132        when (uiop:string-prefix-p sigil string)
133          do (return tag)))
134
135(defun parse-line (string &key verbatimp)
136  "Parse STRING as a LINE.
137If VERBATIMP is non-nil, parse STRING verbatim."
138  (let* ((tag (tag-of string))
139         (text (chomp (subseq string (length (car (rassoc tag *tags*)))))))
140    (cond
141      (verbatimp
142       (make-line :tag :verbatim :content text))
143      ((eq :link tag)
144       (let* ((%split-line (split "\\s" text))
145              (href (chomp (car %split-line)))
146              (text (if (cdr %split-line)
147                        (chomp (format nil "~{~A~^ ~}" (cdr %split-line)))
148                        href)))
149         (make-link :href href :content text)))
150      ((zerop (length text))
151       blank)
152      (t
153       (make-line :tag tag :content text)))))
154
155(defun parse-lines (&optional (input *standard-input*))
156  "Parse INPUT into a list of LINEs."
157  (loop for this = (read-line input nil)
158        with verbatimp = nil
159        until (null this)
160        when (eq :verbatim (tag-of this))
161          do (setf verbatimp (not verbatimp))
162        collect (parse-line this :verbatimp verbatimp)))
163
164(defun group-stanzas (lines)
165  "Group LINES into a list of STANZAs."
166  (flet ((%accumulate-stanzas (stanzas line)
167           (let ((stanza (car stanzas)))
168             (cond
169               ;; A naked line goes into a stanza by itself.
170               ((not stanza)
171                (list (list line) nil))
172               ;; Two links in a row -- start a new stanza.
173               ((and (tag-equal line :link)
174                     (tag-equal (car stanza) :link)
175                     (tag-null (cadr stanza)))
176                (list* (list line (car stanza))
177                       (make-stanza (nreverse (cdr stanza)))
178                       (cdr stanzas)))
179               ;; Same line type -- continue this stanza.
180               ((tag-equal (car stanza) (line-tag line))
181                (list* (cons line stanza)
182                       (cdr stanzas)))
183               ;; Links within paragraphs --- continue the stanza.
184               ((or (and (tag-equal line :link)
185                         (tag-null (car stanza))
186                         (not (blankp (car stanza))))
187                    (and (tag-null line)
188                         (tag-equal (car stanza) :link)
189                         (tag-null (cadr stanza))))
190                (list* (cons line stanza)
191                       (cdr stanzas)))
192               ;; Different tags --- start a new stanza.
193               (t
194                (list* (list line)
195                       (make-stanza (nreverse stanza))
196                       (cdr stanzas)))))))
197    (let ((stanzas (remove-if #'blankp
198                              (reduce #'%accumulate-stanzas lines
199                                      :initial-value ()))))
200      (remove-if (lambda (x) (or (null x) (blankp x)))
201                 (nreverse (cons (make-stanza (nreverse (car stanzas)))
202                                 (cdr stanzas)))))))
203
204(defun parse (source)
205  (make-document :content (group-stanzas (parse-lines source))))
206
207(defun parse-string (string)
208  (with-input-from-string (s string)
209    (parse s)))
210
211(defun parse-file (file)
212  (with-open-file (s file)
213    (parse s)))
214
215;;; Writing
216
217(defstruct writer
218  (name nil :type (or symbol string character))
219  ;; FORMATS, STRUCT-FILTERS, and TEXT-FILTERS are all lists of SPECs, where a
220  ;; SPEC is of the form (TAG LEVEL PAYLOAD).
221  (formats `((:line ,*default-line-format*)
222             (:stanza ,*default-stanza-format*)
223             (:document ,*default-document-format*)))
224  struct-filters
225  text-filters)
226
227(defparameter *current-writer* (make-writer))
228(defparameter *writers* (list *current-writer*))
229
230(defun current-writer ()
231  (or *current-writer* (make-writer)))
232
233(defmacro defwriter (name &key formats struct-filters text-filters)
234  `(let ((new-writer (make-writer :name ,(intern (string name) :keyword)
235                                  :formats ',formats
236                                  :struct-filters ',struct-filters
237                                  :text-filters ',text-filters)))
238     (pushnew new-writer
239              *writers*
240              :test #'equal)
241     new-writer))
242
243(defwriter gemtext
244  :formats ((:h3 (:line "~&### ~A"))
245            (:h2 (:line "~&## ~A"))
246            (:h1 (:line "~&# ~A"))
247            (:quote (:line "~&> ~A"))
248            (:list (:line "~&* ~A"))
249            (:link (:line "~&=> ~A ~A" href content))
250            (:verbatim (:line "~&~A"))
251            (:verbatim (:stanza  "~&```~A~%~{~A~%~}```~&" alt content)))
252  :struct-filters ((:stanza combine-like))
253  :text-filters ((:line unfold-text)))
254
255(defwriter html
256  :formats ((:stanza "~&<p>~{~A~^~%~}</p>")
257            (:verbatim
258             (:stanza "~&~A~{~A~^~%~}~A" open content close))
259            (:link
260             (:line "~&<li><a href=\"~A\">~A</a></li>" href content)
261             (:stanza "~&<ul>~%~{~A~%~}</ul>")
262             (:inline "<a href=\"~A\">~A</a> " href content))
263            (:list (:line "~&<li>~A</li>")
264                   (:stanza "~&<ul>~%~{~A~%~}</ul>"))
265            (:quote (:stanza "~&<blockquote>~%~{~A~%~}</blockquote>"))
266            (:h1 (:stanza "~&<h1>~{~A~^ ~}</h1>"))
267            (:h2 (:stanza "~&<h2>~{~A~^ ~}</h2>"))
268            (:h3 (:stanza "~&<h3>~{~A~^ ~}</h3>")))
269  :struct-filters ((:line add-inline-markup escape-html/pre)
270                   (:verbatim (:stanza verbatim-preproc))
271                   (:verbatim (:line identity))))
272
273(defun writer (x)
274  (etypecase x
275    (writer x)
276    ((or symbol string character)       ; string-designator
277     (or (find x *writers*
278               :key #'writer-name
279               :test #'string-equal)
280         (error "Writer `~A' unknown." x)))))
281
282(defun spec-match (formats tag level)
283  (if (levelp tag)
284      (alist-get tag formats)
285      (alist-get level (alist-get tag formats))))
286
287(defun levelp (x)
288  (find x '(:line :inline :stanza :document)))
289
290(defun format-object (writer object level &key content)
291  (let ((format-args (format-of writer
292                                level
293                                (ignore-errors
294                                 (slot-value object 'tag)))))
295    (when (null (car format-args))
296      (setf format-args (cdr format-args)))
297    (apply #'format nil (car format-args)
298           (mapcar (lambda (slot)
299                     (or content
300                         (ignore-errors
301                          (slot-value object slot))
302                         ""))
303                   (or (cdr format-args)
304                       '(content))))))
305
306(defun format-of (writer level &optional tag)
307  (let ((formats (writer-formats (writer writer))))
308    (or (spec-match formats tag level)
309        (case level
310          (:line
311           (or (spec-match formats nil :line)
312               (list *default-line-format*)))
313          (:inline
314           (or (spec-match formats tag :line)
315               (spec-match formats nil :inline)
316               (spec-match formats nil :line)
317               (list *default-line-format*)))
318          (:stanza
319           (or (spec-match formats nil :stanza)
320               (list *default-stanza-format*)))
321          (:document
322           (or (spec-match formats nil :document)
323               (list *default-document-format*)))))))
324
325(defun struct-filters-of (writer tag level)
326  (let ((filters (writer-struct-filters (writer writer))))
327    (apply #'compose #'identity
328           (or (spec-match filters tag level)
329               (spec-match filters nil level)))))
330
331(defun text-filters-of (writer tag level)
332  (let ((filters (writer-text-filters (writer writer))))
333    (apply #'compose #'identity
334           (or (spec-match filters tag level)
335               (spec-match filters nil level)))))
336
337(defun line-string (line &key inlinep (writer (current-writer)))
338  (let* ((tag (line-tag line))
339         (level (if inlinep :inline :line))
340         (line (funcall (struct-filters-of writer tag level)
341                        line)))
342    (funcall (text-filters-of writer tag level)
343             (format-object writer line level))))
344
345(defun stanza-string (stanza &key (writer (current-writer)))
346  (let* ((tag (stanza-tag stanza))
347         (level :stanza)
348         ;;(format-args (format-of writer tag level))
349         (stanza (funcall (struct-filters-of writer tag level)
350                          stanza))
351         (formatted-lines
352           (loop for line in (stanza-content stanza)
353                 collect
354                 (line-string line
355                              :inlinep (not (eq (line-tag line) tag))
356                              :writer writer))))
357    (funcall (text-filters-of writer tag level)
358             (format-object writer stanza level :content formatted-lines))))
359
360(defun document-string (document &key (writer (current-writer)))
361  (let* ((tag nil)
362         (level :document)
363         ;;(format-args (format-of writer tag level))
364         (formatted-stanzas
365           (loop for stanza in (document-content document)
366                 collect
367                 (stanza-string stanza :writer writer))))
368    (funcall (text-filters-of writer tag level)
369             (format-object writer document level :content formatted-stanzas))))
370
371;;; Filters
372
373(defun fold-text (string &key (width 72) (prefix 0))
374  ;; TODO: fold-text-preserving-gemini-links
375  "Fold (word-wrap) STRING to WIDTH."
376  (let ((words (split "\\s+" string))
377        (current-width 0))
378    (loop for word in words
379          for word-width = (length word)
380          if (>= (+ word-width current-width) width)
381            do (progn (format t "~&~,,v@a " prefix word)
382                      (setf current-width (+ word-width 1 prefix)))
383          else
384            do (progn (format t "~a " word)
385                      (incf current-width (1+ word-width))))))
386
387(defun unfold-text (string)
388  "Replace newlines in STRING with spaces."
389  (regex-replace-all "\\n" (chomp string) " "))
390
391(defun combine-like (stanza)
392  "Combine lines with similar tags in STANZA together."
393  (let ((combined
394          (reduce
395           (lambda (lines line)
396             (let ((this (car lines)))
397               (if (and this
398                        (not (link-p line))
399                        (eq (line-tag this)
400                            (line-tag line)))
401                   (cons (make-line :tag (line-tag line)
402                                    :content
403                                    (format nil "~A ~A"
404                                            (line-content this)
405                                            (line-content line)))
406                         (cdr lines))
407                   (cons line lines))))
408           (stanza-content stanza)
409           :initial-value ())))
410    (make-stanza (nreverse combined))))
411
412;;; XXX -- buggy as heck
413(defun character-wrap (string char open close)
414  "Convert runs of 'CHAR ... CHAR' in STRING to 'OPEN ... CLOSE'.
415CHAR must be on a word boundary.  Example:
416(character-wrap \"some *body* once told me\" #\* \"<b>\" \"<\/b>\") =>
417\"some <b>body</b> once told me\"."
418  (regex-replace-all `(:sequence
419                       (:register
420                        (:alternation
421                         :whitespace-char-class
422                         :start-anchor
423                         #\- #\[ #\( #\{ #\" #\' #\{{CONTENT}}lt;))
424                       ,char
425                       (:register
426                        (:sequence
427                         :non-whitespace-char-class
428                         (:non-greedy-repetition 0 nil :everything)
429                         :non-whitespace-char-class))
430                       ,char
431                       (:register
432                        (:alternation
433                         :whitespace-char-class
434                         :end-anchor
435                         #\, #\. #\? #\! #\; #\: #\- #\] #\) #\}
436                         #\" #\' #\{{CONTENT}}gt;)))
437                     string
438                     (format nil "\\1~A\\2~A\\3" open close)))
439
440
441(defun escape-html (text)
442  "Escape HTML entities in TEXT."
443  (ppcre:regex-replace-all
444   "[&<>]" text
445   (lambda (target-string start end match-start match-end reg-starts reg-ends)
446     (declare (ignore start end match-end reg-starts reg-ends))
447     (case (char target-string match-start)
448       (#\{{CONTENT}}lt; "&lt;")
449       (#\{{CONTENT}}gt; "&gt;")
450       (#\{{CONTENT}}amp; "&amp;")))))
451
452(defun escape-html/pre (line)
453  (if (link-p line)
454      (make-link :href (link-href line)
455                 :content (escape-html (link-content line)))
456      (make-line :tag (line-tag line)
457                 :content (escape-html (line-content line)))))
458
459(defun add-inline-markup (line)
460  "Convert *inline* =markup= to HTML in a LINE."
461  (let* ((text (line-content line))
462         (text (character-wrap text #\* "<b>" "</b>"))
463         (text (character-wrap text #\_ "<i>" "</i>"))
464         (text (character-wrap text #\= "<code>" "</code>")))
465    (if (link-p line)
466        (make-link :content text :href (link-href line))
467        (make-line :tag (line-tag line) :content text))))
468
469(defun verbatim-preproc (verb)
470  "Give VERB the correct OPEN and CLOSE tags."
471  (let ((alt (verbatim-alt verb))
472        (open "<pre><code>")
473        (close "</code></pre>")
474        (preproc '(escape-html/pre)))
475
476    (unless (zerop (length alt))
477      (case (char alt 0)
478        ((#\. #\#)                      ; .classes, #id
479         (multiple-value-bind (classes id)
480             (loop for x in (ppcre:all-matches-as-strings "[#.][^#.]+" alt)
481                   if (eq #\. (char x 0))
482                     collect (subseq x 1) into classes
483                   if (eq #\# (char x 0))
484                     collect (subseq x 1) into id
485                   finally (return (values classes id)))
486           (setf open (format nil "<pre~@?~@?><code>"
487                              (if classes " class=\"~{~A~^ ~}\"" "~*")
488                              classes
489                              (if id " id=\"~{~A~^ ~}\"" "~*")
490                              id))))
491
492        (#\{{CONTENT}}lt;                            ; custom HTML tag
493         (if (uiop:string-prefix-p "<>" alt)
494             ;; ```<> means interpret the verbatim block as raw HTML
495             (setf open ""
496                   close ""
497                   preproc nil)
498             (setf open alt
499                   close (regex-replace-all "<([^\\s>]+)([^>]+)?>"
500                                            open
501                                            "</\\1>"))))
502
503        (#\|                            ; pipe to an external process
504         (let* ((stanza-text
505                  (loop for ln in (verbatim-content verb)
506                        collect (line-content ln) into ss
507                        finally (return (format nil "~{~A~^~%~}" ss))))
508                (command-line
509                  (ppcre:split "\\s" (subseq alt 1)))
510                (piped-text
511                  (ignore-errors
512                   (with-input-from-string (s stanza-text)
513                     (uiop:run-program command-line :input s :output
514                                       :string)))))
515           ;; Here it's easier to just return a new stanza altogether
516           (when piped-text
517             (setf open (format nil "<!-- ~A -->~%" alt)
518                   close "")
519             (setf verb (%make-verbatim
520                         :tag :verbatim
521                         :content (with-input-from-string (s piped-text)
522                                    (parse-lines s)))))))))
523
524    (%make-verbatim
525     :tag :verbatim
526     :alt (verbatim-alt verb)
527     :open open
528     :close close
529     :content (mapcar (apply #'compose #'identity preproc)
530                      (verbatim-content verb)))))
531
532;;; Utilities
533
534(defun chomp (string)
535  "Trim whitespace off both ends of STRING."
536  (string-trim '(#\newline #\space #\tab) string))
537
538(defun compose (function &rest more-functions) ; yoinked from alexandria
539  "Returns a function composed of FUNCTION and MORE-FUNCTIONS that applies its
540arguments to to each in turn, starting from the rightmost of MORE-FUNCTIONS,
541and then calling the next one with the primary value of the last."
542  (reduce (lambda (f g)
543            (lambda (&rest arguments)
544              (declare (dynamic-extent arguments))
545              (funcall f (apply g arguments))))
546          more-functions
547          :initial-value function))
548
549(defun alist-get (x alist &rest rest)
550  (cdr (apply #'assoc x alist rest)))
551
552(defun alist-set (x alist new &rest rest)
553  (rplacd (apply #'assoc x alist rest) new))
554
555(defsetf alist-get alist-set)