The Complete Computing Environment

Arroyo System Cache

LifeTechEmacsTopicsArcology

The Arroyo System Cache (aka arroyo-db) collects metadata from org-roam files stored in the page as #+KEYWORD: value keyword properties. These are inserted in to a SQLite database which can then be queried by system generators.

Most of this exists in one form or another, I plan to take heavy inspiration from vulpea and vino when re-assembling the parts i have in my Shared CCE Helpers in a library with a more normalized schema than the simple kv pairs I was choosing to work with.

headmatter & db functions

(defcustom arroyo-db-location (expand-file-name "arroyo.db" user-emacs-directory)
  "Arroyo System Cache location"
  :group 'arroyo)

(defvar arroyo-db--schemata nil
  "A list of emacsql table schemas")
(defvar arroyo-db--connection (make-hash-table :test #'equal)
  "Database connection to `arroyo database.")

(defvar arroyo-db-update-functions ()
  "list of functions to run in the save hooks to update database tables")

Arroyo-db is largely cribbed from org-roam-db as that has proven to be a capable Hypermedia platform for org-mode and to some extent vino.

(require 'emacsql-sqlite)
(require 'dash)

(defun arroyo-db ()
  "Entrypoint to the `arroyo' sqlite database.

Initializes and stores the database, and the database connection.
Performs a database upgrade when required."
  (unless (and (arroyo-db--get-connection)
               (emacsql-live-p (arroyo-db--get-connection)))
    (let ((init-db (not (file-exists-p arroyo-db-location))))
      (make-directory (file-name-directory arroyo-db-location) t)
      (let ((conn (emacsql-sqlite arroyo-db-location)))
        (set-process-query-on-exit-flag (emacsql-process conn) nil)
        (puthash (expand-file-name arroyo-db-location)
                 conn
                 arroyo-db--connection)
        (when init-db
          (arroyo-db--init conn)))))
  (arroyo-db--get-connection))

(defun arroyo-db--init (db)
  "Initialize DB with the correct schema and user version."
  (message "Initializing Arroyo Database")
  (emacsql-with-transaction db
    (pcase-dolist (`(,table . ,schema) arroyo-db--schemata)
      (condition-case err
          (emacsql db [:create-table $i1 $S2] table schema)
        (t nil)))
    (emacsql db (format "PRAGMA user_version = \"%s\""
                        (arroyo-db--version)))))

(defun arroyo-db--get-connection ()
  "Return the database connection, if any."
  (gethash (expand-file-name arroyo-db-location)
           arroyo-db--connection))

(defun arroyo-db--close-all ()
  "Closes all database connections made by Org-roam."
  (dolist (conn (hash-table-values arroyo-db--connection))
    (arroyo-db--close conn)))

(defun arroyo-db--close (&optional db)
  "Closes the database connection for database DB."
  (unless db
    (setq db (arroyo-db--get-connection)))
  (when (and db (emacsql-live-p db))
    (emacsql-close db)))

(defun arroyo-db--upgrade-maybe (db version)
  "Upgrades the database schema for DB, if VERSION is old."
  (emacsql-with-transaction db
    'ignore
    (if (not (string-equal (format "%s" version) (arroyo-db--version)))
        (progn
          (arroyo-db--close)
          (delete-file arroyo-db-location)
          (arroyo-db))))
  version)

to build the DB on save, we use an after-save-hook:

(defun arroyo-db--update-on-save-h ()
  "Locally setup file update for `arroyo-db' system cache."
  (when (org-roam-file-p)
    (add-hook 'after-save-hook #'arroyo-db-update-file nil 'local)))

(add-hook 'find-file-hook #'arroyo-db--update-on-save-h)
(defun arroyo-db-file-updated-p (file)
  "Returns t if the file's hash does not match the one recorded in the Arroyo DB."
  (not (equal (caar (arroyo-db-query [:select hash :from files :where (= file $s1)] file))
              (org-roam-db--file-hash file))))

(defun arroyo-db-update-file-maybe (&optional file _no-require)
  "Update cache for FILE. it will skip if this file's recorded hash matches the disk hash."
  (setq file (or file (buffer-file-name (buffer-base-buffer))))
  (when (arroyo-db-file-updated-p file)
    (arroyo-db-update-file file _no-require)))

(defun arroyo-db-update-file (&optional file _no-require)
  "Update cache for FILE."
  (setq file (or file (buffer-file-name (buffer-base-buffer))))
  (org-roam-with-file file nil
    (dolist (func arroyo-db-update-functions)
      (funcall func))))

;(add-function :after (symbol-function 'org-roam-db-update-file) #'arroyo-db-update-file)
(add-function :after (symbol-function 'org-roam-db-sync) #'arroyo-db-update-all-roam-files)

(defun arroyo-db-update-all-roam-files (&optional force)
  (interactive "P")
  (let ((gc-cons-threshold org-roam-db-gc-threshold))
    (arroyo-db--close) ;; Force a reconnect
    (when force (delete-file arroyo-db-location))
    (arroyo-db) ;; To initialize the database, no-op if already initialized
    (dolist-with-progress-reporter (file (org-roam-list-files))
        "Processing all files..."
      (arroyo-db-update-file-maybe file))
    (arroyo-db-cull-deleted-files)))

(defun arroyo-db-cull-deleted-files ()
  (interactive)
  (->>
   (arroyo-db-query [:select file :from keywords])
   (-map #'car)
   (-remove #'file-exists-p)
   (apply #'vector)
   (arroyo-db-query [:delete :from keywords :where (in file $v1)])))

query interface is just emacsql, higher-level things will be built on that.

(define-error 'emacsql-constraint "SQL constraint violation")
(defun arroyo-db-query (sql &rest args)
  "Run SQL query on arroyo database with ARGS.
SQL can be either the emacsql vector representation, or a string."
  (apply #'emacsql (arroyo-db) sql args))

(defun arroyo-db-get (keyword &optional file)
  "arroyo KEYWORD value, optionally for FILE"
  (if file
      (-map #'car (arroyo-db-query [:select value :from keywords
                                    :where (= keyword $s1)
                                    :and (= file $s2)]
                                   keyword file))
    (arroyo-db-query [:select [file value] :from keywords
                      :where (= keyword $s1)]
                     keyword)))

(defun arroyo-db-by-keyword (keyword value)
  "arroyo files by KEYWORD+VALUE search"
  (-map #'car (arroyo-db-query [:select file :from keywords
                                :where (= keyword $s1)
                                :and (= value $s2)]
                               keyword value)))

(defun arroyo--files-for-role (role)
  (->> role
       (arroyo-db-by-keyword "ARROYO_NIXOS_ROLE") 
       (--filter (arroyo-db-get "ARROYO_NIXOS_MODULE" it))
       (--map (cons it (arroyo-db--get-file-title-from-org-roam it)))))

(defun arroyo-db--get-file-title-from-org-roam (file)
  (->> file
       (org-roam-db-query [:select title :from nodes :where (= level 0) :and (= file $s1)]) 
       (caar)))

CANCELLED Arroyo DB uses hashes, not incremental versioning

This list can be extended with other functions which take an accumulator and hash their own state in to it. See arroyo-db--hash below for example, or arroyo-db--keywords-hash, etc. The goal with this methodology is to be able to invalidate the cache when the configuration changes. This might be a bad idea, in the case that the configuration is invalid and i need the working configuration to restore it. Secure Backup Infrastructure or at least ZFS maybe saves me, I need to take advantage of that more often!!

(defvar arroyo-db-hash-input-functions
  '(arroyo-db--hash)
  "A list of functions which take an accumulator and hash their
  own configuraiton in to the accumulator")
(defun arroyo-db--version ()
  (-reduce (lambda (first second)
             (funcall second first))
           ;; starts with the accumulator
           (append '("") arroyo-db-hash-input-functions)))

(defun arroyo-db--hash (acc)
  "A function which mixes the databases schemata in to the arroyo version hash, used to invalidate the DB on change"
  (secure-hash 'sha256 (format "%s" (append acc arroyo-db-keywords))))

NEXT still need a hash/cache-key system

auto-tangle files that are modified?

NEXT the PRAGMA user_version is an integer, need to put the hash string in a better place.

NEXT this should probably have a minor mode eventually to manage the hooks. also need rename-file and delete-file advice…

NEXT deferred upgrade on schema change in (arroyo-db) should block future ops

org-roam #+PROPERTY keyword caching

This keywords table provides a simple key-value store which can be further derived in to tables for smarter applications like the Arroyo Emacs Generator, etc. Included here is a hash function.

(defcustom arroyo-db-keywords '("BIRTHDAY" "PRONOUNS" "LOCATION")
  "Keyword properties which will be stored in the org-roam db keywords table."
  :type '(repeat string)
  :group 'arroyo)

(defun arroyo-db--keywords-hash (acc)
  (secure-hash 'sha256 (format "%s" (append acc arroyo-db-keywords))))
(add-to-list 'arroyo-db-hash-input-functions #'arroyo-db--keywords-hash)

It's a simple file-level-properties key/value table shaped like:

(add-to-list 'arroyo-db--schemata
             '(keywords
               [(file :not-null)
                (keyword :not-null)
                (value :not-null)])) 

And we cache the files' hash as well to compare against the org-roam database.

(add-to-list 'arroyo-db--schemata
             '(files
               [(file :not-null)
                (hash :not-null)])) 
(defun arroyo-db--record-file-hash (&optional update-p)
  (pcase-let* ((file (buffer-file-name))
               (pair (org-roam-db-query [:select [file hash] :from files :where (= file $s1)] file))
               (`(,file ,hash) (first pair)))
    (arroyo-db-query [:delete :from files :where (= file $s1)]
                     file)
    (arroyo-db-query [:insert :into files :values $v1]
                     (--map (apply #'vector it) pair))))
(add-to-list 'arroyo-db-update-functions #'arroyo-db--record-file-hash)

(defun arroyo-db--extract-keywords (&optional file-path)
  "Extract props specified in [`arroyo-db-keywords'] from current buffer and return the type and the key of the ref."
  (setq file-path (or file-path
                      (buffer-file-name)))
  (save-excursion
    (org-roam-with-file file-path nil
      (when arroyo-db-keywords
        (-filter
         #'cdr
         (arroyo-db--extract-global-props arroyo-db-keywords))))))

(defun arroyo-db--insert-keywords (&optional update-p)
  "Insert KEYWORDS for current buffer into the Org-roam cache.
If UPDATE-P is non-nil, first remove keywords for the file in the database.
Return the number of rows inserted."
  (let* ((file (buffer-file-name))
         (keywords (arroyo-db--extract-keywords file)))
    (arroyo-db-query [:delete :from keywords
                      :where (= file $s1)]
                     file)
    (if keywords
        (progn
          (arroyo-db-query
           [:insert :into keywords
            :values $v1]
           (maplist (lambda (keyword)
                      (vector file (caar keyword) (cdar keyword)))
                    keywords))
          1)
      0)))
(add-to-list 'arroyo-db-update-functions #'arroyo-db--insert-keywords)

functions from org-roam v1 to extract file-level properties

(defun arroyo-db--collect-keywords (keywords)
  "Collect all Org KEYWORDS in the current buffer."
  (if (functionp 'org-collect-keywords)
      (org-collect-keywords keywords)
    (let ((buf (org-element-parse-buffer))
          res)
      (dolist (k keywords)
        (let ((p (org-element-map buf 'keyword
                   (lambda (kw)
                     (when (string-equal (org-element-property :key kw) k)
                       (org-element-property :value kw)))
                   :first-match nil)))
          (push (cons k p) res)))
      res)))

(defun arroyo-db--extract-global-props-keyword (keywords)
  "Extract KEYWORDS from the current Org buffer."
  (let (ret)
    (pcase-dolist (`(,key . ,values) (arroyo-db--collect-keywords keywords))
      (dolist (value values)
        (push (cons key value) ret)))
    ret))

(defun arroyo-db--extract-global-props-drawer (props)
  "Extract PROPS from the file-level property drawer in Org."
  (let (ret)
    (org-with-point-at 1
      (dolist (prop props ret)
        (when-let ((v (org-entry-get (point) prop)))
          (push (cons prop v) ret)))))) 

(defun arroyo-db--extract-global-props (props)
  "Extract PROPS from the current Org buffer.
Props are extracted from both the file-level property drawer (if
any), and Org keywords. Org keywords take precedence."
  (append
   (arroyo-db--extract-global-props-keyword props)
   (arroyo-db--extract-global-props-drawer props))) 

Footmatter

This is arroyo-db.el and can be (require 'arroyo-db)'d

(provide 'arroyo-db)