; defdoc -> defxdoc converter

(in-package "XDOC")
(include-book "import-acl2doc")
(include-book "std/util/defconsts" :dir :system)
(include-book "str/top" :dir :system)
(include-book "std/osets/sort" :dir :system)
(include-book "xdoc/parse-xml" :dir :system)
(include-book "misc/assert" :dir :system)
(set-state-ok t)
(program)


; Note: quick and dirty, ugly ill-documented code.
;
; I thought this was going to be really easy: i'd just use the existing ACL2
; importer to turn the strings into XML, then dump them into an output file.
; That took 10 minutes to write, but the result was rather ugly.
;
;  - there was no sensible word wrapping
;  - there was excessive escaping since no @({...}) or @('...') were used
;  - every link was of the <see topic='@(url foo)'>foo</see> variety, which
;    was really long
;  - the <code> blocks weren't indented, so emacs color highlighting was
;    completely wrong.
;  - the defxdoc forms were indented weirdly, e.g.,
;
;     (defxdoc
;      acl2::|A Sketch of How the Rewriter Works|
;      :parents (acl2::|Pages Written Especially for the Tours|)
;      :short "A Sketch of How the Rewriter Works"
;      :long
;      "<p></p>
;      ...")
;
; I started to fix some of these with some elisp macros.  But for the
; preprocessor stuff, it seems like fixing it in ACL2 is easier.  My basic
; approach was:
;
;  - Change the way XDOC stuff is generated so it's pure XML
;  - Parse in these XML tokens with XDOC's primitive XML parser
;  - Write a fixer-upper that replaces, e.g., <code> with @({...}), <tt> with
;    @('...'), and fixes <see ...> links in the obvious case.
;  - Fix up the XML tokens, creating new, mixed XML/preprocessor text.

(defun next-open-or-close-tag (tokens)
  ;; returns (mv prefix suffix), car of suffix is thef first open/close tag.
  (b* (((when (atom tokens))
        (mv nil nil))
       ((when (or (opentok-p (car tokens))
                  (closetok-p (car tokens))))
        (mv nil tokens))
       ((mv prefix suffix)
        (next-open-or-close-tag (cdr tokens))))
    (mv (cons (car tokens) prefix) suffix)))

(defun tokens-to-plaintext-aux (tokens acc)
  ;; acc is a character list in reverse order.
  (b* (((when (atom tokens))
        acc)
       (acc (cond ((texttok-p (car tokens))
                   (str::revappend-chars (texttok-text (car tokens)) acc))
                  ((entitytok-p (car tokens))
                   (str::revappend-chars (entitytok-as-plaintext (car tokens)) acc))
                  (t
                   (progn$
                    (cw "*** TOKENS-TO-PLAINTEXT: Expected only text/entity tokens!")
                    acc)))))
    (tokens-to-plaintext-aux (cdr tokens) acc)))

(defun tokens-to-plaintext (tokens)
  (b* ((acc (tokens-to-plaintext-aux tokens nil))
       (str (str::rchars-to-string acc)))
    ;; Ordinary entities have already been converted.  however, acl2's exporter
    ;; also converts @ into @@.  So for use in @({...}) and @('...') sections, we
    ;; need to convert it back.
    (str::strsubst "@@" "@" str)))

(defun write-attrval (x acc)
  ;; see read-attrval.  x is some attribute value.  if it has any quotes we
  ;; need to escape them.  we'll prefer single quotes because they're nicer
  ;; within lisp text.
  (cond ((not (str::substrp "'" x))
         ;; no single quotes in the value, so it's okay to use 'value'
         (cons #\' (str::revappend-chars x (cons #\' acc))))
        ((not (str::substrp "\"" x))
         ;; no double quotes in value, so it's okay to use "value"
         (cons #\" (str::revappend-chars x (cons #\" acc))))
        (t
         (er hard? 'write-attrval-aux
             "Attribute value has quotes in it."))))

(defun write-atts (x acc)
  ;; atts is an alist of name . value
  (b* (((when (atom x))
        acc)
       ((cons name value) (car x))
       (acc (cons #\Space acc))
       (acc (str::revappend-chars name acc))
       (acc (cons #\= acc))
       (acc (write-attrval value acc)))
    (write-atts (cdr x) acc)))

(defun write-opentok (x acc)
  (b* ((name (opentok-name x))
       (atts (opentok-atts x))
       (acc (str::revappend-chars "<" acc))
       (acc (str::revappend-chars name acc))
       (acc (write-atts atts acc))
       (acc (str::revappend-chars ">" acc)))
    acc))

(defun write-closetok (x acc)
  (b* ((name (closetok-name x))
       (acc (str::revappend-chars "</" acc))
       (acc (str::revappend-chars name acc))
       (acc (str::revappend-chars ">" acc))
       ;;(acc (if (member-equal (closetok-name (car tokens)) '("p" "blockquote"))
       ;;         (list* #\Newline #\Newline acc)
       ;;       acc))
       )
    acc))

(defun pure-upper-case-p (x)
  (and (str::string-has-some-up-alpha-p x 0 (length x))
       (not (str::string-has-some-down-alpha-p x 0 (length x)))))

(defun fixup (tokens acc state)
  ;; acc is a character list in reverse order.
  (b* (((when (atom tokens))
        (mv acc state))

       ((when (texttok-p (car tokens)))
        (fixup (cdr tokens)
               (str::revappend-chars (texttok-text (car tokens)) acc)
               state))

       ((when (entitytok-p (car tokens)))
        (fixup (cdr tokens)
               (str::revappend-chars (entitytok-as-entity (car tokens)) acc)
               state))

       ((when (closetok-p (car tokens)))
        (fixup (cdr tokens)
               (write-closetok (car tokens) acc)
               state))

       ((when (opentok-p (car tokens)))
        (b* ((name (opentok-name (car tokens)))
             (atts (opentok-atts (car tokens))))

          (cond ((equal name "code")
                 ;; Convert <code>...</code> fragments into @({...}) when possible,
                 ;; since it's (1) more readable and (2) helps &amp; stuff go away.
                 (b* (((mv prefix suffix) (next-open-or-close-tag (cdr tokens)))

                      ((unless (and (consp suffix)
                                    (closetok-p (car suffix))
                                    (equal (closetok-name (car suffix)) "code")))
                       ;; Code tag that is never closed or has embedded markup.
                       ;; Embedded markup can occasionally happen, e.g., it is
                       ;; used in 'acl2::|A Flying Tour of ACL2| as a horrble
                       ;; structuring device.  It's sort of a miracle that it
                       ;; works at all.  We can't turn these into preprocessor
                       ;; stuff, so leave them as code tags.
                       (cw "Warning: <code> block too fancy for preprocessor!~%")
                       (fixup (cdr tokens)
                              (write-opentok (car tokens) acc)
                              state))

                      (guts (tokens-to-plaintext prefix))
                      ((when (str::substrp "})" guts))
                       ;; Code tag with }) occurring somewhere inside it.  Rare,
                       ;; but can occasionally happen with abstract stobj stuff,
                       ;; e.g., ":preserved update-misc{preserved})".  So, we can't
                       ;; safely turn these into preprocessor things.  Leave them
                       ;; as ordinary code tags.
                       (cw "Warning: <code> block too fancy for preprocessor!~%")
                       (fixup (cdr tokens)
                              (write-opentok (car tokens) acc)
                              state))

                      ;; Indent every code line one space, so that s-exprssions
                      ;; will not start on column zero and screw up emacs
                      ;; syntax highlighting.
                      (guts (str::trim-bag guts '(#\Newline)))
                      (indented-guts (str::prefix-lines guts " "))

                      ;; Else, fine to use the preprocessor instead.
                      (acc (str::revappend-chars "@({" acc))
                      (acc (cons #\Newline acc))
                      ;(acc (cons #\Space acc)) ;; due to trim
                      ;(acc (str::revappend-chars (str::trim indented-guts) acc))
                      ;(acc (cons #\Newline acc)) ;; due to trim
                      (acc (str::revappend-chars indented-guts acc))
                      (acc (cons #\Newline acc))
                      (acc (str::revappend-chars "})" acc)))
                   (fixup (cdr suffix) acc state)))

                ((equal name "tt")
                 (b* (((mv prefix suffix) (next-open-or-close-tag (cdr tokens)))

                      ((unless (and (consp suffix)
                                    (closetok-p (car suffix))
                                    (equal (closetok-name (car suffix)) "tt")))
                       ;; tt tag with embedded markup?  don't convert it...
                       (fixup (cdr tokens)
                              (write-opentok (car tokens) acc)
                              state))

                      (guts (tokens-to-plaintext prefix))
                      ((when (str::substrp "')" guts))
                       ;; has ') somewhere inside it, can't convert it.
                       ;; several occurrences of this due to things like
                       ;; <tt>(equal x x')</tt>.
                       (fixup (cdr tokens)
                              (write-opentok (car tokens) acc)
                              state))

                      ;; else, fine to use the preprocessor.
                      (acc (str::revappend-chars "@('" acc))
                      (acc (str::revappend-chars guts acc))
                      (acc (str::revappend-chars "')" acc)))
                   (fixup (cdr suffix) acc state)))

                ((equal name "see")
                 (b* (((mv prefix suffix) (next-open-or-close-tag (cdr tokens)))

                      ((unless (and (consp suffix)
                                    (closetok-p (car suffix))
                                    (equal (closetok-name (car suffix)) "see")))
                       ;; see tag with embedded markup?  something fancy, don't
                       ;; convert it
                       (fixup (cdr tokens)
                              (write-opentok (car tokens) acc)
                              state))

                      (topic (cdr (assoc-equal "topic" atts)))
                      (codep (assoc-equal "use-tsee" atts))

                      ((unless topic)
                       (cw "*** Fubar atts for <see> tag: ~x0.~%" atts)
                       (fixup (cdr tokens)
                              (write-opentok (car tokens) acc)
                              state))

                      ((unless (and (str::strprefixp "@(url " topic)
                                    (str::strsuffixp ")" topic)))
                       (cw "*** Fubar topic for <see> tag: ~x0.~%" topic)
                       (fixup (cdr tokens)
                              (write-opentok (car tokens) acc)
                              state))

                      (link-target (subseq topic (length "@(url ") (- (length topic) 1)))
                      (link-text   (tokens-to-plaintext prefix))
                      ((unless (str::istreqv link-target link-text))
                       ;; fancy link, leave it alone
                       (or (not codep)
                           (cw "*** Fubar: fancy code link?~%~
                                   Tag: ~x0~%
                                   Text: ~x1~%~%"
                               (car tokens)
                               link-text))
                       (fixup (cdr tokens)
                              (write-opentok (car tokens) acc)
                              state))

                      ;; simple link, turn it into @(see foo).  safety check to
                      ;; try to avoid package problems...
                      ((mv error ?target-sym &)
                       (parse-symbol link-target 0 (length link-target)
                                     'acl2::rewrite
                                     (known-package-alist state)
                                     t))
                      ((when error)
                       (cw "*** fubar link symbol: ~x0.~%" topic)
                       (fixup (cdr tokens)
                              (write-opentok (car tokens) acc)
                              state))

; Originally this is what we were doing.  This was kind of misguided.  We were
; using the link target (a symbol, so typically in all upper case) and then
; trying to downcase it.  That's better than leaving it in all upper case.  But
; better yet, let's just use the actual link text, since that'll preserve the
; capitalization of things like ~ilc[Guard].  This might not be completely safe
; for symbols that need bar escaping or that sort of thing.  But maybe it will
; work in practice.

                      ((mv new-link-text state)
                       (fmt-to-str target-sym 'acl2::rewrite state))

                      (new-link-text
                       (if (str::istreqv (symbol-name target-sym) link-text)
                           ;; seems safe enough
                           link-text
                         (prog2$
                          (cw "*** hard symbol to link nicely: ~x0.~%" topic)
                          new-link-text)))

                      (- (or (not (pure-upper-case-p new-link-text))
                             (cw "Capitalized link may get downcased: ~x0.~%"
                                 new-link-text)))
                      (acc (if codep
                               (str::revappend-chars "@(tsee " acc)
                             (str::revappend-chars "@(see " acc)))
                      ;; (acc (str::revappend-chars new-link-text acc))
                      (acc (str::revappend-chars new-link-text acc))

                      (acc (str::revappend-chars ")" acc)))
                   (fixup (cdr suffix) acc state)))

                (t
                 (fixup (cdr tokens)
                        (write-opentok (car tokens) acc)
                        state))))))
    (cw "*** FIXUP: bad token ~x0.~%")
    (mv acc state)))

(defun fixup-str (tokens state)
  (b* (((mv acc state) (fixup tokens nil state)))
    (mv (str::rchars-to-string acc) state)))

(defun fixup-topic (topic state)
  (b* ((name    (cdr (assoc :name topic)))
       (parents (cdr (assoc :parents topic)))
       (short   (str::trim (cdr (assoc :short topic))))
       (long    (str::trim (cdr (assoc :long topic))))
       (- (cw "Processing ~x0.~%" name))
       ((mv err1 short-tokens) (parse-xml short))
       ((when err1)
        (cw "*** In topic ~x0, error in :short:~%" name)
        (let ((state (princ$ err1 *standard-co* state)))
          (mv nil state)))
       ((mv err2 long-tokens)  (parse-xml long))
       ((when err2)
        (cw "*** In topic ~x0, error in :long:~%" name)
        (let ((state (princ$ err2 *standard-co* state)))
          (mv nil state)))
       ((mv short-fixed state) (fixup-str short-tokens state))
       ((mv long-fixed state)  (fixup-str long-tokens state)))
    (mv `((:name    . ,name)
          (:parents . ,parents)
          (:short   . ,short-fixed)
          (:long    . ,long-fixed))
        state)))

(defun fixup-topics (topics state)
  (b* (((when (atom topics))
        (mv nil state))
       ((mv topic1 state) (fixup-topic (car topics) state))
       ((mv rest state)   (fixup-topics (cdr topics) state)))
    (mv (cons topic1 rest) state)))



; sorting topics in the file... kind of a crapshoot.  maybe a mostly alphabetical
; order is at least somewhat reasonable?

(defun acl2-native-symbolp (x)
  (equal (intern$ (symbol-name x) "ACL2") x))

(assert! (acl2-native-symbolp 'common-lisp::append))
(assert! (acl2-native-symbolp 'acl2::rewrite))
(assert! (not (acl2-native-symbolp 'xdoc::rewrite)))

(defun prepare-for-sort (topics)
  (if (atom topics)
      nil
    (cons (cons (if (acl2-native-symbolp (cdr (assoc :name (car topics)))) 0 1)
                (car topics))
          (prepare-for-sort (cdr topics)))))

(defun my-sort (topics)
  (strip-cdrs (sets::mergesort (prepare-for-sort topics))))

(acl2::defconsts (*fixed-topics* state)
  (mv-let
   (fixed state)
   (fixup-topics
    (cons '((:name    . acl2::acl2)
            (:parents . (acl2::top))
            (:short   . "ACL2 documentation (system only, not including the community books)")
            (:long    . "<p>This is the ACL2 documentation.  For a manual that includes both the ACL2 documentation and the ACL2 community books, see <a href='http://fv.centtech.com/acl2/latest/doc/'>http://fv.centtech.com/acl2/latest/doc/</a>.</p>"))
          xdoc::*acl2-ground-zero-topics*)
    state)
   (mv (my-sort fixed) state)))

; I started out just using fms! to print things, which was easy but not very
; good.  It indented the forms weirdly and printed symbol names unnecessarily.
; So I use a custom function to print these now.

(defun write-parents-aux (x acc state)
  (b* (((when (atom x))
        (mv acc state))
       ((mv str1 state) (fmt-to-str (car x) 'acl2::rewrite state))
       (acc (str::revappend-chars str1 acc))
       ((when (atom (cdr x)))
        (mv acc state)))
    (write-parents-aux (cdr x) (cons #\Space acc) state)))

(defun write-topic (x acc state)
  (b* ((name    (cdr (assoc :name x)))
       (parents (cdr (assoc :parents x)))
       (short   (str::trim (cdr (assoc :short x))))
       (long    (str::trim (cdr (assoc :long x))))
  
       (acc     (str::revappend-chars "(defxdoc " acc))

       ;; use fmt-to-string to deal with all lisp-encoding stuff
       ((mv name-str  state) (fmt-to-str name  'acl2::rewrite state))
       ((mv short-str state) (fmt-to-str short 'acl2::rewrite state))
       ((mv long-str  state) (fmt-to-str long  'acl2::rewrite state))

       (acc     (str::revappend-chars name-str acc))
       (acc     (cons #\Newline acc))
       (acc     (str::revappend-chars "  :parents (" acc))
       ((mv acc state) (write-parents-aux parents acc state))
       (acc     (str::revappend-chars ")" acc))
       (acc     (cons #\Newline acc))
       (acc     (str::revappend-chars "  :short " acc))
       (acc     (str::revappend-chars (str::trim short-str) acc))
       (acc     (cons #\Newline acc))
       (acc     (str::revappend-chars "  :long " acc))
       ;; Put one space in front of everything.
       (acc     (str::revappend-chars (str::prefix-lines (str::trim long-str)
                                                         " ")
                                      acc))
       (acc     (cons #\) acc))
       (acc     (cons #\Newline acc))
       (acc     (cons #\Newline acc)))
    (mv acc state)))

(defun write-topics (x acc state)
  (b* (((when (atom x))
        (mv acc state))
       ((mv acc state) (write-topic (car x) acc state)))
    (write-topics (cdr x) acc state)))

(acl2::defconsts (*output* state)
  (b* ((acc (str::revappend-chars
 "; acl2-doc.lisp - Documentation for the ACL2 Theorem Prover

; ACL2 Version 6.3 -- A Computational Logic for Applicative Common Lisp
; Copyright (C) 2013, Regents of the University of Texas

; This documentation was derived from the ACL2 system in October 2013, which
; was a descendent of ACL2 Version 1.9, Copyright (C) 1997 Computational Logic,
; Inc.  See the documentation topic NOTE-2-0.

; This program is free software; you can redistribute it and/or modify it under
; the terms of the LICENSE file distributed with ACL2.

; This program is distributed in the hope that it will be useful, but WITHOUT
; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
; FOR A PARTICULAR PURPOSE.  See the LICENSE file distributed with ACL2 for
; more details.

; Here are the original authors of this book.

; Written by:  Matt Kaufmann               and J Strother Moore
; email:       Kaufmann@cs.utexas.edu      and Moore@cs.utexas.edu
; Department of Computer Science
; University of Texas at Austin
; Austin, TX 78712 U.S.A.

; The ACL2 community is invited to contribute to this book by joining
; the acl2-books group at http://acl2-books.googlecode.com/.  If you
; edit this book, then please conform to its style, for example
; restricting to 79 columns and avoiding tab characters.  Please add
; additional instructions here as appropriate.  For general
; information about writing documentation, see the combined manual
; (for example: http://fv.centtech.com/acl2/latest/doc/), in
; particular the topic DEFXDOC and the related topics MARKUP and
; PREPROCESSOR.

(in-package \"ACL2\")
(include-book \"xdoc/top\" :dir :system)

" nil))
       ((mv acc state) (write-topics *fixed-topics* acc state)))
    (mv (str::rchars-to-string acc) state)))

(defttag :open-output-channel)

(acl2::defconsts state
  (b* (((mv channel state) (open-output-channel! "acl2-doc.lisp" :character state))
       ((unless channel)
        (er hard? 'convert "failed to open output.lsp")
        state)
       (state (princ$ *output* channel state))
       (state (close-output-channel channel state)))
    state))





;; (defun print-each-command (commands channel state) 
;;   (declare (xargs :mode :program))
;;   (b* (((when (atom commands))
;;         state)
;;        (state (fms "~x0~%" (list (cons #\0 (car commands)))
;;                    channel state nil)))
;;     (print-each-command (cdr commands) channel state)))

;; (b* (((mv channel state) (open-output-channel! "output.lsp" :character state))
;;      ((unless channel)
;;       (er hard? 'convert "failed to open output.lsp")
;;       state)
;;      (state (set-print-case :downcase state))
;;      (state (fms! "(in-package \"ACL2\")~%" nil channel state nil))
;;      (state (print-each-command *commands* channel state))
;;      (state (close-output-channel channel state)))
;;   state)


;; (xdoc::find-topic 'acl2::|A Flying Tour of ACL2|
;;                   xdoc::*acl2-ground-zero-topics*)

;; (trace$ (parse-symbol-name-part :entry (list :x x :n n :xl xl)))
;; (trace$ (parse-symbol :entry (list :x x :n n :xl xl)))

;; (PARSE-SYMBOL "ACL2-PC::UNDO" 0 13 'acl2::REWRITE (known-package-alist state) t)
