1(in-package :cl-user)
2
3(defpackage #:net.acdw.clarence
4 (:nicknames #:clarence)
5 (:use :cl)
6 (:import-from :cl-ppcre
7 #:split
8 #:regex-replace-all)
9 (:export #:*tags*
10 #:line #:make-line #:copy-line #:line-p
11 #:line-tag #:line-content
12 #:link #:make-link #:copy-link #:link-p
13 #:link-tag #:link-content #:link-href
14 #:stanza #:make-stanza #:copy-stanza #:stanza-p
15 #:stanza-tag #:stanza-content
16 #:verbatim #:copy-verbatim #:verbatim-p
17 #:verbatim-tag #:verbatim-content
18 #:verbatim-open #:verbatim-close #:verbatim-alt
19 #:document #:make-document #:copy-document #:document-p
20 #:document-content
21 #:blank #:blankp
22 #:parse-line #:parse-lines #:parse #:parse-string #:parse-file
23 #:writer #:make-writer #:copy-writer #:writer-p
24 #:writer-name #:writer-line-format #:writer-inline-format
25 #:writer-stanza-format #:writer-document-format #:writer-tag-formats
26 #:writer-line-preproc #:writer-inline-preproc
27 #:writer-stanza-preproc #:writer-document-preproc
28 #:writer-tag-preprocs
29 #:writer-line-postproc #:writer-inline-postproc
30 #:writer-stanza-postproc #:writer-document-postproc
31 #:writer-tag-postprocs
32 #:defwriter #:*current-writer* #:*writers*
33 #:tag-of #:format-of #:preprocs-of #:postprocs-of
34 #:line-string #:stanza-string #:document-string
35 #:fold-text #:unfold-text #:combine-like #:character-wrap
36 #:gemtext
37 #:html #:escape-html #:escape-html/pre
38 #:add-inline-markup #:verbatim-preproc))
39
40(in-package :net.acdw.clarence)
41
42;;; Parameters
43
44(defparameter *tags*
45 '(("###" . :h3)
46 ("##" . :h2)
47 ("#" . :h1)
48 ("=>" . :link)
49 ("> " . :quote)
50 ("* " . :list)
51 ("```" . :verbatim))
52 "Alist of source sigil to internal tags.
53It's important that if tags start with the same characters, the longer tag is
54first.")
55
56;;; Gemtext types
57
58(defstruct line
59 tag content)
60
61(defstruct (stanza (:constructor %make-stanza))
62 tag content)
63
64(defstruct document
65 content)
66
67;; Derived types
68
69(defstruct (link (:include line)
70 (:constructor %make-link))
71 href)
72
73(defstruct (verbatim (:include stanza)
74 (:constructor %make-verbatim))
75 alt open close)
76
77;; Blank lines
78
79(defvar blank (make-line :tag :blank))
80
81(defun blankp (line)
82 "Is LINE blank?"
83 (and (line-p line)
84 (or (eq :blank (line-tag line))
85 (zerop (length (line-content line))))))
86
87;; Constructors
88
89(defun make-link (&key (content "") (href ""))
90 "Make an instance of LINK."
91 (%make-link :tag :link :content content :href href))
92
93(defun make-stanza (lines)
94 (let ((tag (line-tag (car lines))))
95 (if (eq :verbatim tag)
96 (%make-verbatim :tag :verbatim
97 :content ; the first and last lines are ```
98 (nreverse (cdr (nreverse (cdr lines))))
99 :alt (line-content (car lines)))
100 (%make-stanza :tag tag
101 :content lines))))
102
103;;; Reading
104
105(defun parse-line (input-string &key (verbatimp nil))
106 "Parse a LINE from INPUT-STRING.
107If VERBATIMP is non-nil, return a verbatim line."
108 (let* ((tag (tag-of input-string))
109 (text (chomp (subseq input-string (length (car (rassoc tag *tags*)))))))
110 (cond
111 (verbatimp
112 (make-line :tag :verbatim :content input-string))
113 ((eq :link tag)
114 (let* ((%split-line (split "\\s" text))
115 (href (chomp (car %split-line)))
116 (text (if (cdr %split-line)
117 (chomp (format nil "~{~A~^ ~}" (cdr %split-line)))
118 href)))
119 (make-link :href href :content text)))
120 ((zerop (length text))
121 (make-line :tag :blank))
122 (t
123 (make-line :tag tag :content text)))))
124
125(defun parse-lines (&optional (input *standard-input*))
126 "Parse a list of LINEs from INPUT."
127 (loop for this = (read-line input nil)
128 with verbatimp = nil
129 until (null this)
130 collect (parse-line this :verbatimp verbatimp)
131 when (eq :verbatim (tag-of this))
132 do (setf verbatimp (not verbatimp))))
133
134(defun group-stanzas (lines)
135 "Group LINES into a list of STANZAs."
136 (flet ((%accumulate-stanzas (stanzas line)
137 (let ((stanza (car stanzas)))
138 (cond
139 ;; A naked line goes into a stanza by itself.
140 ((not stanza)
141 (list (list line) nil))
142 ;; Two links in a row -- start a new stanza.
143 ((and (eq :link (line-tag line))
144 (eq :link (line-tag (car stanza)))
145 (cadr stanza)
146 (null (line-tag (cadr stanza))))
147 (list* (list line (car stanza))
148 (make-stanza (nreverse (cdr stanza)))
149 (cdr stanzas)))
150 ;; Same line type -- continue this stanza.
151 ((eq (line-tag line)
152 (and (car stanza) (line-tag (car stanza))))
153 (list* (cons line stanza)
154 (cdr stanzas)))
155 ;; Links within paragraphs --- continue the stanza.
156 ((or (and (eq :link (line-tag line))
157 (null (line-tag (car stanza)))
158 (not (blankp (car stanza))))
159 (and (null (line-tag line))
160 (eq :link (line-tag (car stanza)))
161 (cadr stanza)
162 (null (line-tag (cadr stanza)))))
163 (list* (cons line stanza)
164 (cdr stanzas)))
165 ;; Different tags --- start a new stanza.
166 (t
167 (list* (list line)
168 (make-stanza (nreverse stanza))
169 (cdr stanzas)))))))
170 (let ((stanzas (remove-if (lambda (st)
171 (and (stanza-p st)
172 (eq :blank (stanza-tag st))))
173 (reduce #'%accumulate-stanzas lines
174 :initial-value ()))))
175 (remove-if #'null
176 (nreverse (cons (make-stanza (nreverse (car stanzas)))
177 (cdr stanzas)))))))
178
179(defun parse (source)
180 (make-document :content (group-stanzas (parse-lines source))))
181
182(defun parse-string (string)
183 (with-input-from-string (s string)
184 (parse s)))
185
186(defun parse-file (file)
187 (with-open-file (s file)
188 (parse s)))
189
190;;; Writing
191
192;; Type
193
194(defstruct writer
195 "A WRITER holds information needed to write a document.
196
197*-FORMAT slots hold format specifiers for elements. A bare string is taken as a
198format specifier to be applied to that elements' CONTENT; a list is expected to
199have a CAR that's the specifier and a CDR full of slot names for the given
200object. T is substituted with the object's formatted content. These slots are
201for plain (nil-tagged) text.
202
203TAG-FORMATS hold differnet formats for each tag. This slot is an
204alist where each element is (TAG . PLIST). PLIST keys are either :LINE,
205:INLINE, or :STANZA, and the values are as in *-FORMAT slots.
206
207The PREPROC and POSTPROC slots of a WRITER are similar to FORMAT slots, except
208they hold preprocessing and postprocessing functions, respectively.
209Preprocessors are applied to the raw object before formatting it; postprocessors
210are applied to the resulting, formatted text."
211 name
212 ;; FORMATs control how element text is emitted. STANZA and DOCUMENT formats
213 ;; should handle lists.
214 (line-format "~&~A")
215 inline-format
216 (stanza-format "~&~{~A~^~&~}~%~%")
217 document-format
218 tag-formats
219 ;; PREPROCs are lists of functions that operate, in order, on the raw data
220 ;; structure before it's formatted.
221 line-preproc
222 inline-preproc
223 stanza-preproc
224 document-preproc
225 tag-preprocs
226 ;; POSTPROCs are lists of functions that operate, in order, on the formatted
227 ;; text of the given structure.
228 line-postproc
229 inline-postproc
230 stanza-postproc
231 document-postproc
232 tag-postprocs)
233
234(defmacro defwriter (name &rest make-writer-args)
235 (let ((name (intern (format nil "*~A-WRITER*" name)))
236 (keyword (intern (format nil "~A" name) :keyword)))
237 `(progn (defvar ,name
238 (make-writer :name ,keyword ,@make-writer-args))
239 (pushnew ,name *writers*))))
240
241(defun writer (x)
242 "Ensure X is a writer.
243If X is a writer, return it. If it's a string-designator, find the writer named
244X in *WRITERs*. If it doesn't exist or is another type, error."
245 (etypecase x
246 (writer x)
247 ((or symbol string character) ; string-designator
248 (or (find x *writers*
249 :key #'writer-name
250 :test #'string-equal)))))
251
252;; Parameters
253
254(defparameter *current-writer* (make-writer)
255 "The WRITER that's currently being used.")
256
257(defparameter *writers* (list *current-writer*)
258 "A list of known WRITERs.")
259
260;; Queries
261
262(defun tag-of (input)
263 "Return the tag of INPUT line."
264 (loop for (sigil . tag) in *tags*
265 when (uiop:string-prefix-p sigil input)
266 do (return tag)))
267
268(defun format-of (level tag writer)
269 "Get the format of TAG at LEVEL in WRITER.
270If a given format doesn't exist, it falls through in this order:
2711. <tag>-line -> line
2722. <tag>-stanza -> stanza
2733. <tag>-inline -> <tag>-line -> inline -> line
2744. document -> stanza"
275 (let* ((formats (writer-tag-formats writer))
276 (tag-formats (cdr (assoc tag formats)))
277 (target (getf tag-formats level)))
278 (uiop:ensure-list
279 (if target
280 target
281 (case level
282 (:line ; 1.
283 (writer-line-format writer))
284 (:stanza ; 2. -- or ts -> s -> tl -> l ?
285 (or (getf tag-formats :stanza)
286 (writer-stanza-format writer)))
287 (:inline ; 3.
288 (or (getf tag-formats :line)
289 (writer-inline-format writer)
290 (writer-line-format writer)))
291 (:document ; 4.
292 (or (writer-document-format writer)
293 (writer-stanza-format writer))))))))
294
295(defun preprocs-of (level tag writer)
296 "Return a function composing all the preprocessors of TAG at LEVEL."
297 (let ((fns (uiop:ensure-list
298 (or (getf (cdr (assoc tag (writer-tag-preprocs writer)))
299 level)
300 (slot-value writer
301 (ecase level
302 (:line 'line-preproc)
303 (:inline 'inline-preproc)
304 (:stanza 'stanza-preproc)
305 (:document 'document-preproc)))))))
306 (apply #'compose #'identity fns)))
307
308(defun postprocs-of (level tag writer)
309 "Return a function composing all the postprocessors of TAG at LEVEL."
310 (let ((fns (uiop:ensure-list
311 (or (getf (cdr (assoc tag (writer-tag-postprocs writer)))
312 level)
313 (slot-value writer
314 (ecase level
315 (:line 'line-postproc)
316 (:inline 'inline-postproc)
317 (:stanza 'stanza-postproc)
318 (:document 'document-postproc)))))))
319 (apply #'compose #'identity fns)))
320
321;; Output strings
322
323(defun line-string (line &key inlinep (writer (or *current-writer*
324 (make-writer))))
325 "Return a string of LINE's written representation.
326If INLINEP is non-nil, format according to :inline, else :line."
327 (let* ((writer (writer writer))
328 (level (if inlinep :inline :line))
329 (format-args (format-of level (line-tag line) writer))
330 (line (funcall (preprocs-of level (line-tag line) writer)
331 line)))
332 (funcall (postprocs-of level (line-tag line) writer)
333 (apply #'format nil (car format-args)
334 (mapcar (lambda (slot)
335 (if (eq slot t)
336 (line-content line)
337 (slot-value line slot)))
338 (or (cdr format-args) '(t)))))))
339
340(defun stanza-string (stanza &key (writer (or *current-writer*
341 (make-writer))))
342 "Return a string of STANZA's written representation."
343 (let* ((writer (writer writer))
344 (stanza (funcall (preprocs-of :stanza (stanza-tag stanza) writer)
345 stanza))
346 (format-args (format-of :stanza (stanza-tag stanza) writer))
347 (formatted-lines
348 (loop for line in (stanza-content stanza)
349 collect
350 (line-string line
351 :inlinep (not (eq (line-tag line)
352 (stanza-tag stanza)))
353 :writer writer))))
354 (funcall (postprocs-of :stanza (stanza-tag stanza) writer)
355 (apply #'format nil (car format-args)
356 (mapcar (lambda (slot)
357 (if (eq slot t)
358 formatted-lines
359 (slot-value stanza slot)))
360 (or (cdr format-args) '(t)))))))
361
362(defun document-string (document &key (writer (or *current-writer*
363 (make-writer))))
364 "Return a string of DOCUMENT's written representation."
365 (let* ((writer (writer writer))
366 (document (funcall (preprocs-of :document nil writer)
367 document))
368 (format-args (format-of :document nil writer))
369 (formatted-stanzas
370 (loop for stanza in (document-content document)
371 collect (stanza-string stanza :writer writer))))
372 (funcall (postprocs-of :document nil writer)
373 (apply #'format nil (car format-args)
374 (mapcar (lambda (slot)
375 (if (eq slot t)
376 formatted-stanzas
377 (slot-value document slot)))
378 (or (cdr format-args) '(t)))))))
379
380;;; Processors
381
382(defun fold-text (string &key (width 72) (prefix 0))
383 ;; TODO: fold-text-preserving-gemini-links
384 "Fold (word-wrap) STRING to WIDTH."
385 (let ((words (split "\\s+" string))
386 (current-width 0))
387 (loop for word in words
388 for word-width = (length word)
389 if (>= (+ word-width current-width) width)
390 do (progn (format t "~&~,,v@a " prefix word)
391 (setf current-width (+ word-width 1 prefix)))
392 else
393 do (progn (format t "~a " word)
394 (incf current-width (1+ word-width))))))
395
396(defun unfold-text (string)
397 "Replace newlines in STRING with spaces."
398 (regex-replace-all "\\n" (chomp string) " "))
399
400(defun combine-like (stanza)
401 "Combine lines with similar tags in STANZA together."
402 (let ((combined
403 (reduce
404 (lambda (lines line)
405 (let ((this (car lines)))
406 (if (and this
407 (not (link-p line))
408 (eq (line-tag this)
409 (line-tag line)))
410 (cons (make-line :tag (line-tag line)
411 :content
412 (format nil "~A ~A"
413 (line-content this)
414 (line-content line)))
415 (cdr lines))
416 (cons line lines))))
417 (stanza-content stanza)
418 :initial-value ())))
419 (make-stanza (nreverse combined))))
420
421(defun character-wrap (string char open close)
422 "Convert runs of 'CHAR ... CHAR' in STRING to 'OPEN ... CLOSE'.
423CHAR must be on a word boundary. Example:
424(character-wrap \"some *body* once told me\" #\* \"<b>\" \"<\/b>\") =>
425\"some <b>body</b> once told me\"."
426 (ppcre:regex-replace-all `(:sequence
427 (:register
428 (:alternation
429 :whitespace-char-class
430 :start-anchor
431 #\- #\[ #\( #\{ #\" #\' #\{{CONTENT}}lt;))
432 ,char
433 (:register
434 (:sequence
435 :non-whitespace-char-class
436 (:non-greedy-repetition 0 nil :everything)
437 :non-whitespace-char-class))
438 ,char
439 (:register
440 (:alternation
441 :whitespace-char-class
442 :end-anchor
443 #\, #\. #\? #\! #\; #\: #\- #\] #\) #\}
444 #\" #\' #\{{CONTENT}}gt;)))
445 string
446 (format nil "\\1~A\\2~A\\3" open close)))
447
448;;; Predefined writers
449
450(defwriter gemtext
451 :tag-formats '((:h3
452 :line "~&### ~A")
453 (:h2
454 :line "~&## ~A")
455 (:h1
456 :line "~&# ~A")
457 (:quote
458 :line "~&> ~A")
459 (:list
460 :line "~&* ~A")
461 (:link
462 :line ("~&=> ~A ~A" href t))
463 (:verbatim
464 :line "~&~A"
465 :stanza ("~&```~A~%~{~A~%~}```~&"
466 alt t)))
467 :stanza-preproc '(combine-like)
468 :line-postproc '(unfold-text))
469
470(defwriter html
471 :stanza-format "~&<p>~%~{~A~%~}</p>"
472 :document-format "~{~A~%~}"
473 :tag-formats '((:verbatim
474 ;; See VERBATIM-PREPROC
475 :stanza ("~&~A~{~A~^~%~}~A" open t close))
476 (:link
477 :line ("~&<li><a href=\"~A\">~A</a></li>" href t)
478 :stanza "~&<ul>~%~{~A~%~}</ul>"
479 :inline ("<a href=\"~A\">~A</a> " href t))
480 (:list
481 :line "~&<li>~A</li>"
482 :stanza "~&<ul>~%~{~A~%~}</ul>")
483 (:quote
484 :stanza "~&<blockquote>~%~{~A~%~}</blockquote>")
485 (:h1
486 :stanza "~&<h1>~{~A~^ ~}</h1>")
487 (:h2
488 :stanza "~&<h2>~{~A~^ ~}</h2>")
489 (:h3
490 :stanza "~&<h3>~{~A~^ ~}</h3>"))
491 :line-preproc '(add-inline-markup
492 escape-html/pre)
493 :tag-preprocs '((:verbatim
494 :stanza (verbatim-preproc))))
495
496(defun escape-html (text)
497 "Escape HTML entities in TEXT."
498 (ppcre:regex-replace-all
499 "[&<>]" text
500 (lambda (target-string start end match-start match-end reg-starts reg-ends)
501 (declare (ignore start end match-end reg-starts reg-ends))
502 (case (char target-string match-start)
503 (#\{{CONTENT}}lt; "<")
504 (#\{{CONTENT}}gt; ">")
505 (#\{{CONTENT}}amp; "&")))))
506
507(defun escape-html/pre (line)
508 (if (link-p line)
509 (make-link :href (link-href line)
510 :content (escape-html (link-content line)))
511 (make-line :tag (line-tag line)
512 :content (escape-html (line-content line)))))
513
514(defun add-inline-markup (line)
515 "Convert *inline* =markup= to HTML in a LINE."
516 (let* ((text (line-content line))
517 (text (character-wrap text #\* "<b>" "</b>"))
518 (text (character-wrap text #\_ "<i>" "</i>"))
519 (text (character-wrap text #\= "<code>" "</code>")))
520 (if (link-p line)
521 (make-link :content text :href (link-href line))
522 (make-line :tag (line-tag line) :content text))))
523
524(defun verbatim-preproc (verb)
525 "Give VERB the correct OPEN and CLOSE tags."
526 (let ((alt (verbatim-alt verb))
527 (open "<pre><code>")
528 (close "</code></pre>")
529 (preproc '(escape-html/pre)))
530
531 (unless (zerop (length alt))
532 (case (char alt 0)
533 ((#\. #\#) ; .classes, #id
534 (multiple-value-bind (classes id)
535 (loop for x in (ppcre:all-matches-as-strings "[#.][^#.]+" alt)
536 if (eq #\. (char x 0))
537 collect (subseq x 1) into classes
538 if (eq #\# (char x 0))
539 collect (subseq x 1) into id
540 finally (return (values classes id)))
541 (setf open (format nil "<pre~@?~@?><code>"
542 (if classes " class=\"~{~A~^ ~}\"" "~*")
543 classes
544 (if id " id=\"~{~A~^ ~}\"" "~*")
545 id))))
546
547 (#\{{CONTENT}}lt; ; custom HTML tag
548 (if (uiop:string-prefix-p "<>" alt)
549 ;; ```<> means interpret the verbatim block as raw HTML
550 (setf open ""
551 close ""
552 preproc nil)
553 (setf open alt
554 close (ppcre:regex-replace-all "<(\\S+)[^>]+>" open "</\\1>"))))
555
556 (#\| ; pipe to an external process
557 (let* ((stanza-text
558 (loop for ln in (verbatim-content verb)
559 collect (line-content ln) into ss
560 finally (return (format nil "~{~A~^~%~}" ss))))
561 (command-line
562 (ppcre:split "\\s" (subseq alt 1)))
563 (piped-text
564 (ignore-errors
565 (with-input-from-string (s stanza-text)
566 (uiop:run-program command-line :input s :output :string)))))
567 ;; Here it's easier to just return a new stanza altogether
568 (when piped-text
569 (setf open (format nil "<!-- ~A -->~%" alt)
570 close "")
571 (setf verb (%make-verbatim
572 :tag :verbatim
573 :content (with-input-from-string (s piped-text)
574 (parse-lines s)))))))
575 (t
576 )))
577
578 (%make-verbatim
579 :tag :verbatim
580 :alt (verbatim-alt verb)
581 :open open
582 :close close
583 :content (mapcar (apply #'compose #'identity preproc)
584 (verbatim-content verb)))))
585
586;;; Utilities
587
588(defun chomp (string)
589 "Trim whitespace off both ends of STRING."
590 (string-trim '(#\newline #\space #\tab) string))
591
592(defun compose (function &rest more-functions) ; yoinked from alexandria
593 "Returns a function composed of FUNCTION and MORE-FUNCTIONS that applies its
594arguments to to each in turn, starting from the rightmost of MORE-FUNCTIONS,
595and then calling the next one with the primary value of the last."
596 (reduce (lambda (f g)
597 (lambda (&rest arguments)
598 (declare (dynamic-extent arguments))
599 (funcall f (apply g arguments))))
600 more-functions
601 :initial-value function))