diff --git a/project.clj b/project.clj index eb5ac35..8b2c7b4 100644 --- a/project.clj +++ b/project.clj @@ -5,7 +5,9 @@ :url "http://www.eclipse.org/legal/epl-v10.html"} :main datomic.codeq.core :plugins [[lein-tar "1.1.0"]] + :repositories [["jgit-repository" "https://repo.eclipse.org/content/groups/releases/"]] :dependencies [[com.datomic/datomic-free "0.8.4020.24"] [commons-codec "1.7"] - [org.clojure/clojure "1.5.1"]] + [org.clojure/clojure "1.5.1"] + [org.eclipse.jgit/org.eclipse.jgit "3.0.3.201309161630-r"]] :source-paths ["src" "examples/src"]) diff --git a/src/datomic/codeq/core.clj b/src/datomic/codeq/core.clj index ef83b45..03ba6ad 100644 --- a/src/datomic/codeq/core.clj +++ b/src/datomic/codeq/core.clj @@ -11,10 +11,12 @@ [clojure.java.io :as io] [clojure.set] [clojure.string :as string] - [datomic.codeq.util :refer [index->id-fn tempid?]] [datomic.codeq.analyzer :as az] + [datomic.codeq.git :as git] [datomic.codeq.analyzers.clj]) - (:import java.util.Date) + (:use datomic.codeq.util) + (:import java.util.Date + org.eclipse.jgit.lib.Repository) (:gen-class)) (set! *warn-on-reflection* true) @@ -250,188 +252,131 @@ :db.install/_attribute :db.part/db} ]) -(defn ^java.io.Reader exec-stream - [^String cmd] - (-> (Runtime/getRuntime) - (.exec cmd) - .getInputStream - io/reader)) (defn ensure-schema [conn] (or (-> conn d/db (d/entid :tx/commit)) @(d/transact conn schema))) -;;example commit - git cat-file -p -;;tree d81cd432f2050c84a3d742caa35ccb8298d51e9d -;;author Rich Hickey 1348842448 -0400 -;;committer Rich Hickey 1348842448 -0400 - -;; or - -;;tree ba63180c1d120b469b275aef5da479ab6c3e2afd -;;parent c3bd979cfe65da35253b25cb62aad4271430405c -;;maybe more parents -;;author Rich Hickey 1348869325 -0400 -;;committer Rich Hickey 1348869325 -0400 -;;then blank line -;;then commit message - - -;;example tree -;;100644 blob ee508f768d92ba23e66c4badedd46aa216963ee1 .gitignore -;;100644 blob b60ea231eb47eb98395237df17550dee9b38fb72 README.md -;;040000 tree bcfca612efa4ff65b3eb07f6889ebf73afb0e288 doc -;;100644 blob 813c07d8cd27226ddd146ddd1d27fdbde10071eb epl-v10.html -;;100644 blob f8b5a769bcc74ee35b9a8becbbe49d4904ab8abe project.clj -;;040000 tree 6b880666740300ac57361d5aee1a90488ba1305c src -;;040000 tree 407924e4812c72c880b011b5a1e0b9cb4eb68cfa test - -;; example git remote origin -;;RichMacPro:codeq rich$ git remote show -n origin -;;* remote origin -;; Fetch URL: https://github.com/Datomic/codeq.git -;; Push URL: https://github.com/Datomic/codeq.git -;; HEAD branch: (not queried) - -(defn get-repo-uri - "returns [uri name]" - [] - (with-open [s (exec-stream (str "git remote show -n origin"))] - (let [es (line-seq s) - ^String line (second es) - uri (subs line (inc (.lastIndexOf line " "))) - noff (.lastIndexOf uri "/") - noff (if (not (pos? noff)) (.lastIndexOf uri ":") noff) - name (subs uri (inc noff)) - _ (assert (pos? (count name)) "Can't find remote origin") - name (if (.endsWith name ".git") (subs name 0 (.indexOf name ".")) name)] - [uri name]))) - -(defn dir - "Returns [[sha :type filename] ...]" - [tree] - (with-open [s (exec-stream (str "git cat-file -p " tree))] - (let [es (line-seq s)] - (mapv #(let [ss (string/split ^String % #"\s")] - [(nth ss 2) - (keyword (nth ss 1)) - (subs % (inc (.indexOf ^String % "\t")) (count %))]) - es)))) - -(defn commit - [[sha _]] - (let [trim-email (fn [s] (subs s 1 (dec (count s)))) - dt (fn [ds] (Date. (* 1000 (Integer/parseInt ds)))) - [tree parents author committer msg] - (with-open [s (exec-stream (str "git cat-file -p " sha))] - (let [lines (line-seq s) - slines (mapv #(string/split % #"\s") lines) - tree (-> slines (nth 0) (nth 1)) - [plines xs] (split-with #(= (nth % 0) "parent") (rest slines))] - [tree - (seq (map second plines)) - (vec (reverse (first xs))) - (vec (reverse (second xs))) - (->> lines - (drop-while #(not= % "")) - rest - (interpose "\n") - (apply str))]))] - {:sha sha - :msg msg - :tree tree - :parents parents - :author (trim-email (author 2)) - :authored (dt (author 1)) - :committer (trim-email (committer 2)) - :committed (dt (committer 1))})) +(defn authors-tx-data + [db {:keys [author committer]}] + (let [tempid? map? + email->id (index->id-fn db :email/address) + authorid (email->id author) + committerid (email->id committer) + tx-data (cond-> [] + ;;record author's email if new + (tempid? authorid) + (conj [:db/add authorid :email/address author]) + ;;record committer's email if new and is + ;;distinct from the author's email + (and (not= committer author) (tempid? committerid)) + (conj [:db/add committerid :email/address committer]))] + [authorid committerid tx-data])) + + +(defn commit-node-tx-data + [db repoid root-nodeid {:keys [sha msg parents authored committed] :as commit}] + (let [commitid (d/tempid :db.part/user) + [authorid committerid author-tx-data] (authors-tx-data db commit)] + (into author-tx-data + [[:db/add repoid :repo/commits commitid] + {:db/id (d/tempid :db.part/tx) + :tx/commit commitid + :tx/op :import} + (cond-> {:db/id commitid + :git/type :commit + :commit/tree root-nodeid + :git/sha sha + :commit/author authorid + :commit/authoredAt authored + :commit/committer committerid + :commit/committedAt committed} + msg + (assoc :commit/message msg) + parents + (assoc :commit/parents + (mapv (fn [p] + (if-let [id (index-get-id db :git/sha p)] + id + (throw (ex-info "Parent not previously imported" + {:sha sha :parent p})))) + parents)))]))) (defn commit-tx-data - [db repo repo-name {:keys [sha msg tree parents author authored committer committed] :as commit}] + [db repo repoid repo-name {:keys [sha msg tree parents authored committed] :as commit}] (let [tempid? map? ;;todo - better pred sha->id (index->id-fn db :git/sha) - email->id (index->id-fn db :email/address) filename->id (index->id-fn db :file/name) - authorid (email->id author) - committerid (email->id committer) - cid (d/tempid :db.part/user) - tx-data (fn f [inpath [sha type filename]] - (let [path (str inpath filename) - id (sha->id sha) - filenameid (filename->id filename) - pathid (filename->id path) - nodeid (or (and (not (tempid? id)) - (not (tempid? filenameid)) - (ffirst (d/q '[:find ?e :in $ ?filename ?id - :where [?e :node/filename ?filename] [?e :node/object ?id]] - db filenameid id))) - (d/tempid :db.part/user)) - newpath (or (tempid? pathid) (tempid? nodeid) - (not (ffirst (d/q '[:find ?node :in $ ?path - :where [?node :node/paths ?path]] - db pathid)))) - data (cond-> [] - (tempid? filenameid) (conj [:db/add filenameid :file/name filename]) - (tempid? pathid) (conj [:db/add pathid :file/name path]) - (tempid? nodeid) (conj {:db/id nodeid :node/filename filenameid :node/object id}) - newpath (conj [:db/add nodeid :node/paths pathid]) - (tempid? id) (conj {:db/id id :git/sha sha :git/type type})) - data (if (and newpath (= type :tree)) - (let [es (dir sha)] - (reduce (fn [data child] - (let [[cid cdata] (f (str path "/") child) - data (into data cdata)] - (cond-> data - (tempid? id) (conj [:db/add id :tree/nodes cid])))) - data es)) - data)] - [nodeid data])) - [treeid treedata] (tx-data nil [tree :tree repo-name]) - tx (into treedata - [[:db/add repo :repo/commits cid] - {:db/id (d/tempid :db.part/tx) - :tx/commit cid - :tx/op :import} - (cond-> {:db/id cid - :git/type :commit - :commit/tree treeid - :git/sha sha - :commit/author authorid - :commit/authoredAt authored - :commit/committer committerid - :commit/committedAt committed - } - msg (assoc :commit/message msg) - parents (assoc :commit/parents - (mapv (fn [p] - (let [id (sha->id p)] - (assert (not (tempid? id)) - (str "Parent " p " not previously imported")) - id)) - parents)))]) - tx (cond-> tx - (tempid? authorid) - (conj [:db/add authorid :email/address author]) - - (and (not= committer author) (tempid? committerid)) - (conj [:db/add committerid :email/address committer]))] - tx)) - -(defn commits - "Returns log as [[sha msg] ...], in commit order. commit-name may be nil - or any acceptable commit name arg for git log" - [commit-name] - (let [commits (with-open [s (exec-stream (str "git log --pretty=oneline --date-order --reverse " commit-name))] - (mapv - #(vector (subs % 0 40) - (subs % 41 (count %))) - (line-seq s)))] - commits)) + commitid (d/tempid :db.part/user) + ;;find a node by object and filename id + check-for-node + (fn [object-id filename-id] + (some #(let [{eid :e} %] + (if (seq (d/datoms db :eavt eid :node/filename filename-id)) + eid)) + (d/datoms db :vaet object-id :node/object))) + + walker-fn + (fn [{:keys [sha type path filename parent]}] + (let [;;lookup id for file/tree object by tree sha + objid (sha->id sha) + ;;lookup id for file name + filenameid (filename->id filename) + ;;lookup id for complete path + pathid (filename->id path) + ;;lookup id for tree node + ;;new if either tree sha or filename are new + ;;i.e. file with new content + nodeid (or (and (not (tempid? objid)) + (not (tempid? filenameid)) + (check-for-node objid filenameid)) + (d/tempid :db.part/user)) + ;;path is new if: 1. path name is new, or 2. path name + ;;exists but tree node is new (file name and sha are + ;;unique), or 3. path name and tree node both exist + ;;but the former is not already a path of the latter. + newpath (or (tempid? pathid) (tempid? nodeid) + (every? #(not= nodeid (:e %)) + (d/datoms db :vaet pathid :node/paths))) + data (cond-> + [] + ;;record file name if new + (tempid? filenameid) + (conj [:db/add filenameid :file/name filename]) + ;;record path name if new + (tempid? pathid) + (conj [:db/add pathid :file/name path]) + ;;record tree node if new + (tempid? nodeid) + (conj {:db/id nodeid + :node/filename filenameid + :node/object objid}) + ;;link tree node to new paths + newpath + (conj [:db/add nodeid :node/paths pathid]) + ;;link new tree node to parent + (and (tempid? nodeid) parent) + (conj [:db/add parent :tree/nodes nodeid]) + ;;record the sha of file/tree objects + (tempid? objid) + (conj {:db/id objid :git/sha sha :git/type type}))] + ;;emit the tree node, its object, + ;;and the accumulated tx data + ;;indicating if the tree node is new + {:node-id nodeid + :object-id objid + :new-path newpath + :data data})) + + [root-nodeid treedata] (git/deep-tree-walk repo repo-name tree walker-fn)] + (into treedata + (commit-node-tx-data db repoid root-nodeid commit)))) + (defn unimported-commits - [db commit-name] + [db repo commit-name] (let [imported (into {} (d/q '[:find ?sha ?e :where @@ -439,7 +384,8 @@ [?tx :tx/commit ?e] [?e :git/sha ?sha]] db))] - (pmap commit (remove (fn [[sha _]] (imported sha)) (commits commit-name))))) + (pmap git/extract-commit-info + (git/walk-all-commits repo imported commit-name)))) (defn ensure-db [db-uri] @@ -449,28 +395,29 @@ conn)) (defn import-git - [conn repo-uri repo-name commits] + [conn repo commits] ;;todo - add already existing commits to new repo if it includes them - (println "Importing repo:" repo-uri "as:" repo-name) (let [db (d/db conn) - repo + [repo-uri repo-name] (git/get-origin-uri repo) + repoid (or (ffirst (d/q '[:find ?e :in $ ?uri :where [?e :repo/uri ?uri]] db repo-uri)) (let [temp (d/tempid :db.part/user) tx-ret @(d/transact conn [[:db/add temp :repo/uri repo-uri]]) - repo (d/resolve-tempid (d/db conn) (:tempids tx-ret) temp)] + repoid (d/resolve-tempid (d/db conn) (:tempids tx-ret) temp)] (println "Adding repo" repo-uri) - repo))] + repoid))] + (println "Importing repo:" repo-uri "as:" repo-name) (doseq [commit commits] (let [db (d/db conn)] (println "Importing commit:" (:sha commit)) - (d/transact conn (commit-tx-data db repo repo-name commit)))) + @(d/transact conn (commit-tx-data db repo repoid repo-name commit)))) (d/request-index conn) (println "Import complete!"))) (def analyzers [(datomic.codeq.analyzers.clj/impl)]) (defn run-analyzers - [conn] + [conn repo] (println "Analyzing...") (doseq [a analyzers] (let [aname (az/keyname a) @@ -510,8 +457,13 @@ ;;analyze them (println "analyzing file:" f " - sha: " (:git/sha (d/entity db f))) (let [db (d/db conn) - src (with-open [s (exec-stream (str "git cat-file -p " (:git/sha (d/entity db f))))] - (slurp s)) + rawBytes (->> f + (d/entity db) + :git/sha + (.resolve ^Repository repo) + (.open ^Repository repo) + (.getBytes)) + src (String. rawBytes "UTF-8") adata (try (az/analyze a db f src) (catch Exception ex @@ -527,12 +479,11 @@ (defn main [& [db-uri commit]] (if db-uri - (let [conn (ensure-db db-uri) - [repo-uri repo-name] (get-repo-uri)] - ;;(prn repo-uri) - (import-git conn repo-uri repo-name (unimported-commits (d/db conn) commit)) - (run-analyzers conn)) - (println "Usage: datomic.codeq.core db-uri [commit-name]"))) + (let [conn (ensure-db db-uri)] + (with-open [repo (git/open-existing-repo)] + (import-git conn repo (unimported-commits (d/db conn) repo commit)) + (run-analyzers conn repo))) + (println "Usage: datomic.codeq.core db-uri [commit-name]"))) (defn -main [& args] @@ -572,8 +523,13 @@ ;;analyze them (println \"analyzing file:\" f) (let [db (d/db conn) - s (with-open [s (exec-stream (str \"git cat-file -p \" (:git/sha (d/entity db f))))] - (slurp s)) + rawBytes (->> f + (d/entity db) + :git/sha + (.resolve repo) + (.open repo) + (.getBytes)) + src (String. rawBytes \"UTF-8\") adata (az/analyze a db s)] (d/transact conn (conj adata {:db/id (d/tempid :db.part/tx) diff --git a/src/datomic/codeq/git.clj b/src/datomic/codeq/git.clj new file mode 100644 index 0000000..1243663 --- /dev/null +++ b/src/datomic/codeq/git.clj @@ -0,0 +1,168 @@ + +(ns datomic.codeq.git + (:import [org.eclipse.jgit.lib FileMode ObjectLoader Ref Repository RepositoryBuilder] + [org.eclipse.jgit.revwalk RevCommit RevSort RevTree RevWalk] + org.eclipse.jgit.revwalk.filter.RevFilter + org.eclipse.jgit.treewalk.TreeWalk + org.eclipse.jgit.storage.file.FileRepositoryBuilder)) + + +(set! *warn-on-reflection* true) + + +(defn ^Repository open-existing-repo + "Open an exisiting git repository by + scanning GIT_* environment variables + and scanning up the file system tree." + [] + (.. (FileRepositoryBuilder.) + (readEnvironment) + (findGitDir) + (build))) + + +(defn get-origin-uri + "Lookup the uri for the 'origin' remote, and + extract the repository name. Returns [uri name]." + [^Repository repo] + (if-let [^String uri (.. repo (getConfig) (getString "remote" "origin" "url"))] + (let [noff (.lastIndexOf uri "/") + noff (if (not (pos? noff)) (.lastIndexOf uri ":") noff) + name (subs uri (inc noff)) + _ (assert (pos? (count name)) "Can't find remote origin") + name (if (.endsWith name ".git") (subs name 0 (.indexOf name ".")) name)] + [uri name]) + (throw (ex-info "No remote 'origin' configured for this repository." + {:remotes (.. repo (getConfig) (getSubsections "remote"))})))) + + +(defn walk-all-commits + "Walk all commits in reverse topological order. + + repo - the repository to walk + + imported-commits - a (possibily empty) map of {sha commitid ...} + + rev-str - an optional git revision string + + Returns a lazy sequence of RevCommit objects. The walk will + efficiently skip over SHAs that are keys in the imported-commits map." + [^Repository repo + imported-commits + rev-str] + (let [rev-walk (RevWalk. repo) + commit-id + (if rev-str + (or (.resolve repo ^String rev-str) + (throw (ex-info (str "Can't resolve git revision string." rev-str) + {:revision-string rev-str}))) + (.. repo (getRef "refs/heads/master") (getObjectId))) + rev-commit (.parseCommit rev-walk commit-id) + rev-filter + (proxy [RevFilter] [] + (clone [] this) + (include [rev-walk rev-commit] + (let [sha (.getName ^RevCommit rev-commit) + incl (nil? (imported-commits sha))] + (when-not incl + (println "Skipping commit: " sha)) + incl)) + (requiresCommitBody [] false))] + (seq (doto rev-walk + (.markStart rev-commit) + (.setRevFilter rev-filter) + (.sort RevSort/TOPO true) + (.sort RevSort/REVERSE true))))) + + +(defn extract-commit-info + "Returns a map of information extracted from a RevCommit object." + [^RevCommit commit] + (let [author (.getAuthorIdent commit) + committer (.getCommitterIdent commit)] + {:sha (.getName commit) + :msg (.getFullMessage commit) + :tree (.. commit (getTree) (getName)) + :parents (->> commit + (.getParents) + (map #(.getName ^RevCommit %))) + :author (.getEmailAddress author) + :authored (.getWhen author) + :committer (.getEmailAddress committer) + :committed (.getWhen committer)})) + + +(defn deep-tree-walk + "Walk over the entire tree of repository repo with name repo-name + starting from the point identitfied by tree-sha, using the function + tree-walker to produce transaction data. + + Returns the root tree node id along with the accumulation of + transaction data produced by calling tree-walker on each node of + the tree walk. + + The tree walker function is given a map + {:sha :type :path :filename :parent} + Where the :type is :tree or :blob. :parent is nil at the root of + the tree walk. The tree walker function must return a map + {:node-id :object-id :new-path :data} + Which contains the transaction data along with the tree node id + and object id for linking the node and object. Also the boolean + :new-path indicates if the path that the walker has processed is + new. If so, deep-tree-walk will step into subtrees." + [^Repository repo + repo-name + ^String tree-sha + tree-walker] + (let [;;resolve tree-sha to a revision tree + rev-tree (->> tree-sha + (.resolve repo) + (.parseTree (RevWalk. repo))) + ;;set revision tree as starting point for tree walk + tree-walk (doto (TreeWalk. repo) (.addTree rev-tree)) + ;;create a root tree node from the repository name + {root-nodeid :node-id root-treeid :object-id new-root :new-path seed-data :data} + (tree-walker {:sha tree-sha :type :tree + :path repo-name :filename repo-name + :parent nil})] + (if-not new-root + ;;if root node is not new, then there is nothing to walk + [root-nodeid seed-data] + (loop [stack (list root-treeid) + depth (.getDepth tree-walk) + tx-data seed-data] + (if-not (.next tree-walk) + [root-nodeid tx-data] + (let [curr-id (.getObjectId tree-walk 0) + sha (.getName curr-id) + path (str repo-name "/" (.getPathString tree-walk)) + filename (.getNameString tree-walk)] + (cond + ;;tree walk is pointing at a tree to step into + (.isSubtree tree-walk) + (let [{:keys [object-id data new-path]} + (tree-walker {:sha sha :type :tree + :path path :filename filename + :parent (peek stack)})] + (if new-path + ;;enter subtree only if it's a new path + (do (.enterSubtree tree-walk) + (recur (conj stack object-id) (.getDepth tree-walk) + (into tx-data data))) + ;;else skip over it + (recur stack depth (into tx-data data)))) + ;;depth has decrease so we must have popped out of a subtree + (< (.getDepth tree-walk) depth) + (let [new-depth (.getDepth tree-walk) + new-stack (seq (drop (- depth new-depth) stack)) + {:keys [data]} (tree-walker {:sha sha :type :blob + :path path :filename filename + :parent (peek new-stack)})] + (recur new-stack new-depth (into tx-data data))) + ;;else continue at same depth with another blob + :else + (let [{:keys [data]} + (tree-walker {:sha sha :type :blob + :path path :filename filename + :parent (peek stack)})] + (recur stack depth (into tx-data data)))))))))) diff --git a/test/datomic/codeq/core_test.clj b/test/datomic/codeq/core_test.clj index a5b80bf..31bc30e 100644 --- a/test/datomic/codeq/core_test.clj +++ b/test/datomic/codeq/core_test.clj @@ -1,7 +1,390 @@ (ns datomic.codeq.core-test (:use clojure.test - datomic.codeq.core)) + datomic.codeq.core + datomic.codeq.util + datomic.codeq.test-datomic-util + datomic.codeq.test-git-util) + (:require [datomic.codeq.git :as git] + [datomic.api :as d] + [clojure.java.io :as io])) -(deftest a-test - (testing "FIXME, I fail." - (is (= 1 1)))) \ No newline at end of file + +(def ^:dynamic *conn* + "A dynamic var to hold the Datomic connection." + nil) + +(def ^:dynamic *git* + "A dynamic var to hold the GitPorcelain API." + nil) + +(def ^:dynamic *repo* + "A dynamic var to hold the Git repository." + nil) + +(def ^:dynamic *repo-dir* + "A dynamic var to hold a File object to the repository directory." + nil) + + +(defn datomic-fixture + [f] + (let [uri (str "datomic:mem://" (d/squuid))] + (d/create-database uri) + (let [c (d/connect uri)] + @(d/transact c schema) + (binding [*conn* c] + (f))) + (d/delete-database uri))) + +(defn git-fixture + [f] + (let [d (create-temp-dir) + [g r] (init-repo d)] + (binding [*git* g + *repo* r + *repo-dir* d] + (f)))) + +(use-fixtures :each (compose-fixtures datomic-fixture git-fixture)) + + +(defmacro is-only + [x coll] + `(do + (is (= 1 (count ~coll))) + (is (= ~x (first ~coll))))) + +(defmacro is-coll + [coll-expected coll-found] + `(let [c# ~coll-found] + (do + (is (= (count ~coll-expected) + (count c#))) + (are [e#] + (some #(= e# %) c#) + ~@coll-expected)))) + + +(deftest test-extract-and-import-commit + (testing "creating, extracting, and importing a commit:" + (let [commit (do + (spit (io/file *repo-dir* "a.txt") "contents of a\n") + (add-file-to-index *git* "a.txt") + (git/extract-commit-info + (git-commit-all *git* "added a.txt\n")))] + + (testing "extract the map of commit information" + (is (= "author@example.org" + (:author commit))) + (is (= "committer@example.org" + (:committer commit))) + (is (empty? (:parents commit))) + (is (= "added a.txt\n" + (:msg commit)))) + + (testing "importing the commit" + (let [repoid (transact-repo-uri *conn* "test/.git") + {db :db-after} + @(d/transact *conn* + (commit-tx-data (d/db *conn*) *repo* repoid "test" commit)) + root-tree-node + (get-root-tree-node db (:sha commit)) + children + (get-tree-node-children root-tree-node)] + (is (= 1 (count children))) + (let [node (first children)] + (is (= "a.txt" (node-name node))) + (is (= '("test/a.txt") (node-paths node))))))))) + + +(deftest test-file-append + (testing "a linear history of two commits, where a file is updated:" + (let [f (io/file *repo-dir* "a.txt") + c1 (do + (spit f "line 1\n") + (add-file-to-index *git* "a.txt") + (git/extract-commit-info + (git-commit-all *git* "added a.txt"))) + c2 (do + (spit f "line 2\n" :append true) + (git/extract-commit-info + (git-commit-all *git* "updated a.txt"))) + repoid (transact-repo-uri *conn* "test/.git") + _ (doseq [c [c1 c2]] + @(d/transact *conn* + (commit-tx-data (d/db *conn*) *repo* repoid "test" c))) + db (d/db *conn*)] + + (testing "examining first commit" + (is-only "a.txt" + (find-commit-filenames db (:sha c1))) + (is-only "test/a.txt" + (find-commit-filepaths db (:sha c1))) + ;;hardcoded sha corresponds to "line 1\n" + (is-only (:sha c1) + (find-blob-commits db "89b24ecec50c07aef0d6640a2a9f6dc354a33125"))) + + (testing "examining second commit" + (is-only "a.txt" + (find-commit-filenames db (:sha c2))) + (is-only "test/a.txt" + (find-commit-filepaths db (:sha c2))) + ;;hardcoded sha corresponds to "line 1\nline 2\n" + (is-only (:sha c2) + (find-blob-commits db "7bba8c8e64b598d317cdf1bb8a63278f9fc241b1"))) + + (testing "finding commits by file name and path" + (is-coll [(:sha c1) (:sha c2)] + (find-filename-commits db "a.txt")) + (is-coll [(:sha c1) (:sha c2)] + (find-filepath-commits db "test/a.txt")))))) + + +(deftest test-rename-file + (testing "a linear history of two commits, where a file is renamed:" + (let [f1 (io/file *repo-dir* "a.txt") + c1 (do + (spit f1 "line 1\n") + (add-file-to-index *git* "a.txt") + (git/extract-commit-info + (git-commit-all *git* "added a.txt"))) + f2 (io/file *repo-dir* "b.txt") + c2 (do + (io/copy f1 f2) + (io/delete-file f1) + (add-file-to-index *git* "b.txt") + (git/extract-commit-info + (git-commit-all *git* "renamed a.txt to b.txt"))) + repoid (transact-repo-uri *conn* "test/.git") + _ (doseq [c [c1 c2]] + @(d/transact *conn* + (commit-tx-data (d/db *conn*) *repo* repoid "test" c))) + db (d/db *conn*)] + + (testing "examining first commit" + (is-only "a.txt" + (find-commit-filenames db (:sha c1)))) + + (testing "examining second commit" + (is-only "b.txt" + (find-commit-filenames db (:sha c2)))) + + (testing "finding commits by file contents" + (is-coll [(:sha c1) (:sha c2)] + (find-blob-commits db "89b24ecec50c07aef0d6640a2a9f6dc354a33125")))))) + + +(deftest test-merge-commit + (testing "a branch and recursive merge in one file:" + (let [f (io/file *repo-dir* "a.txt") + c1 (do + (spit f "line 1\n") + (add-file-to-index *git* "a.txt") + (git/extract-commit-info + (git-commit-all *git* "added a.txt\n"))) + c2 (do + (checkout-branch *git* "branch" true) + (spit f "line 0\nline 1\n") + (git/extract-commit-info + (git-commit-all *git* "prepended a line to a.txt\n"))) + c3 (do + (checkout-branch *git* "master") + (spit f "line 1\nline 2\n") + (git/extract-commit-info + (git-commit-all *git* "appended a line to a.txt\n"))) + c4 (do + (merge-branch-no-commit *git* "branch") + (git/extract-commit-info + (git-commit-all *git* (str "Merged branch 'branch'\n")))) + repoid (transact-repo-uri *conn* "test/.git") + _ (doseq [c [c1 c2 c3 c4]] + @(d/transact *conn* + (commit-tx-data (d/db *conn*) *repo* repoid "test" c))) + db (d/db *conn*)] + + (testing "examining initial commit on master" + (is-only (:sha c1) + (find-blob-commits db "89b24ecec50c07aef0d6640a2a9f6dc354a33125"))) + + (testing "examining second commit on branch" + (is-only (:sha c1) + (:parents c2)) + (is-only (:sha c2) + (find-blob-commits db "2bbfc232c2e71c62004c15806843df3ffc3688d0"))) + + (testing "examining third commit on master" + (is-only (:sha c1) + (:parents c3)) + (is-only (:sha c3) + (find-blob-commits db "7bba8c8e64b598d317cdf1bb8a63278f9fc241b1"))) + + (testing "examining merge commit" + (is-coll [(:sha c2) (:sha c3)] + (:parents c4)) + (is-only (:sha c4) + (find-blob-commits db "73fc08f0c8b6a87eaad8f5991df3a150b501462d"))) + + (testing "finding commits by file name and path" + (is-coll [(:sha c1) (:sha c2) (:sha c3) (:sha c4)] + (find-filename-commits db "a.txt")) + (is-coll [(:sha c1) (:sha c2) (:sha c3) (:sha c4)] + (find-filepath-commits db "test/a.txt")))))) + + +(deftest test-mix-of-file-ops + (testing "a linear history of five commits, involving add, update, delete, revert:" + (let [f (io/file *repo-dir* "a.txt") + ;;start with initial commit of two files a.txt and b.txt + c1 (do + (spit f "line 1\n") + (add-file-to-index *git* "a.txt") + (spit (io/file *repo-dir* "b.txt") + "contents of b.txt\n") + (add-file-to-index *git* "b.txt") + (git/extract-commit-info + (git-commit-all *git* "added a.txt and b.txt\n"))) + ;;then update a.txt by appending a line, and adding a new + ;;file c.txt + c2 (do + (spit f "line 2\n" :append true) + (spit (io/file *repo-dir* "c.txt") + "contents of c.txt\n") + (add-file-to-index *git* "c.txt") + (git/extract-commit-info + (git-commit-all *git* "updated a.txt and added c.txt\n"))) + ;;then add a new file d.txt + g (io/file *repo-dir* "d.txt") + c3 (do + (spit g "contents of d.txt\n") + (add-file-to-index *git* "d.txt") + (git/extract-commit-info + (git-commit-all *git* "added d.txt\n"))) + ;;then only to delete that file -- this is equivalent to + ;;reverting commit c3 + c4 (do + (io/delete-file g) + (git/extract-commit-info + (git-commit-all *git* "deleted d.txt\n"))) + ;;finally, revert file a.txt to how it was after the + ;;first commit + c5 (do + (spit f "line 1\n") + (git/extract-commit-info + (git-commit-all *git* "revert a.txt\n"))) + repoid (transact-repo-uri *conn* "test/.git") + _ (doseq [c [c1 c2 c3 c4 c5]] + @(d/transact *conn* + (commit-tx-data (d/db *conn*) *repo* repoid "test" c))) + db (d/db *conn*)] + + (testing "examining first commit" + (is-coll ["a.txt" "b.txt"] + (find-commit-filenames db (:sha c1)))) + + (testing "finding commits with blobs containing 'line 1\\n'" + ;;TODO might have expected c5 here? + (is-only (:sha c1) + (find-blob-commits db "89b24ecec50c07aef0d6640a2a9f6dc354a33125"))) + + (testing "examining second commit" + (is-coll ["a.txt" "c.txt"] + (find-commit-filenames db (:sha c2)))) + + (testing "finding commits with blobs containing 'line 1\\nline 2\\n'" + ;;TODO this is because c4 has the same root tree as c2 as + ;;it reverted c3 + (is-coll [(:sha c2) (:sha c4)] + (find-blob-commits db "7bba8c8e64b598d317cdf1bb8a63278f9fc241b1"))) + + (testing "examining third commit" + (is-only "d.txt" + (find-commit-filenames db (:sha c3)))) + + (testing "examining fourth commit" + ;;TODO as said above, c2 and c4 share a root tree + (is-coll ["a.txt" "c.txt"] + (find-commit-filenames db (:sha c4)))) + + (testing "examining fifth commit" + ;;TODO maybe a tad surprising? + (is (empty? (find-commit-filenames db (:sha c5))))) + + (testing "find commits" + ;;TODO might have expected c5 here? + (is-coll [(:sha c1) (:sha c2) (:sha c4)] + (find-filename-commits db "a.txt")))))) + + +(deftest test-path-trickery + (testing "a linear history of four commits, to examine file path behavior:" + (let [;;start with an initial commit of file a.txt in + ;;subdirectory d1 + dir1 (doto (io/file *repo-dir* "d1") (.mkdir)) + f1 (io/file dir1 "a.txt") + c1 (do + (spit f1 "line 1\n") + (add-file-to-index *git* "d1/a.txt") + (git/extract-commit-info + (git-commit-all *git* "added d1/a.txt\n"))) + ;;then add a file with the same name but distinct + ;;contents into subdirectory d2 + dir2 (doto (io/file *repo-dir* "d2") (.mkdir)) + f2 (io/file dir2 "a.txt") + c2 (do + (spit f2 "line 2\n") + (add-file-to-index *git* "d2/a.txt") + (git/extract-commit-info + (git-commit-all *git* "added d2/a.txt\n"))) + ;;then add a file with the same name and contents as the + ;;previous commit but in subdirectory d3 + dir3 (doto (io/file *repo-dir* "d3") (.mkdir)) + f3 (io/file dir3 "a.txt") + c3 (do + (spit f3 "line 2\n") + (add-file-to-index *git* "d3/a.txt") + (git/extract-commit-info + (git-commit-all *git* "added d3/a.txt\n"))) + ;;the repository is now is a state where there are three + ;;files, all of which have the same name, two of which + ;;have the same content, and all are in distinct + ;;subdirectories + ;;finally overwrite the contents of d2/a.txt with the + ;;contents of d1/a.txt + c4 (do + (spit f2 "line 1\n") + (git/extract-commit-info + (git-commit-all *git* "copied d1/a.txt to d2/a.txt\n"))) + ;;before, two files contained 'line 2\\n' + ;;now, two files contain 'line 1\\n' + repoid (transact-repo-uri *conn* "test/.git") + _ (doseq [c [c1 c2 c3 c4]] + @(d/transact *conn* + (commit-tx-data (d/db *conn*) *repo* repoid "test" c))) + db (d/db *conn*)] + + (testing "find commits with file a.txt" + (is-coll [(:sha c1) (:sha c2) (:sha c3) (:sha c4)] + (find-filename-commits db "a.txt"))) + + (testing "find commits with path d1/a.txt" + ;;TODO might have expected just c1? + (is-coll [(:sha c1) (:sha c4)] + (find-filepath-commits db "test/d1/a.txt"))) + + (testing "find commits with path d2/a.txt" + ;;TODO might have expected just c2 and c4? + (is-coll [(:sha c1) (:sha c2) (:sha c3) (:sha c4)] + (find-filepath-commits db "test/d2/a.txt"))) + + (testing "find commits with path d3/a.txt" + ;;TODO might have expected just c3? + (is-coll [(:sha c2) (:sha c3)] + (find-filepath-commits db "test/d3/a.txt"))) + + (testing "find commits for blob with contents 'line 1\\n'" + (is-coll [(:sha c1) (:sha c4)] + (find-blob-commits db "89b24ecec50c07aef0d6640a2a9f6dc354a33125"))) + + (testing "find commits for blob with contents 'line 2\\n'" + (is-coll [(:sha c2) (:sha c3)] + (find-blob-commits db "b7e242c00cdad96cf88a626557eba4deab43b52f")))))) diff --git a/test/datomic/codeq/test_datomic_util.clj b/test/datomic/codeq/test_datomic_util.clj new file mode 100644 index 0000000..8c83287 --- /dev/null +++ b/test/datomic/codeq/test_datomic_util.clj @@ -0,0 +1,188 @@ +(ns datomic.codeq.test-datomic-util + (:use [datomic.codeq.util :only [index-get-id]] + [datomic.codeq.core :only [schema]]) + (:require [datomic.api :as d])) + + +;; Transact Helpers + +(defn transact-repo-uri + "Transact the given repository uri. + Returns the entity id of the transacted fact." + [conn repo-uri] + (let [id (d/tempid :db.part/user) + {:keys [db-after tempids]} + @(d/transact conn [[:db/add id :repo/uri repo-uri]]) + repoid (d/resolve-tempid db-after tempids id)] + repoid)) + + +;; Query Helpers + +(def rules + '[;;find all blobs ?b that are decendants of tree node ?n + [(node-blobs ?n ?b) + [?n :node/object ?b] [?b :git/type :blob]] + [(node-blobs ?n ?b) + [?n :node/object ?t] [?t :git/type :tree] + [?t :tree/nodes ?n2] (node-files ?n2 ?b)] + + ;;find all file names ?name for blobs that are decendants of tree + ;;node ?n + [(node-files ?n ?name) + [?n :node/object ?b] [?b :git/type :blob] + [?n :node/filename ?f] [?f :file/name ?name]] + [(node-files ?n ?name) + [?n :node/object ?t] [?t :git/type :tree] + [?t :tree/nodes ?n2] (node-files ?n2 ?name)] + + ;;find all file paths ?path for blobs that are decendants of tree + ;;node ?n + [(node-paths ?n ?path) + [?n :node/object ?b] [?b :git/type :blob] + [?n :node/paths ?p] [?p :file/name ?path]] + [(node-paths ?n ?path) + [?n :node/object ?t] [?t :git/type :tree] + [?t :tree/nodes ?n2] (node-paths ?n2 ?path)] + + ;;find all tree nodes ?n that reference this object (blob/tree) or + ;;are ancestors of tree nodes that do + [(object-nodes ?o ?n) + [?n :node/object ?o]] + [(object-nodes ?o ?n) + [?n2 :node/object ?o] + [?t :tree/nodes ?n2] (object-nodes ?t ?n)] + + ;;find all tree nodes ?n that have file name ?name or are + ;;ancestors of tree nodes that do + [(file-nodes ?name ?n) + [?f :file/name ?name] [?n :node/filename ?f]] + [(file-nodes ?name ?n) + (file-nodes ?name ?n2) + [?t :tree/nodes ?n2] [?n :node/object ?t]] + + ;;find all tree nodes ?n that have file path ?path or are + ;;ancestors of tree nodes that do + [(path-nodes ?path ?n) + [?p :file/name ?path] [?n :node/paths ?p]] + [(path-nodes ?path ?n) + (path-nodes ?path ?n2) + [?t :tree/nodes ?n2] [?n :node/object ?t]] + + ;;find all blobs ?b that are within commit ?c's tree + [(commit-blobs ?c ?b) + [?c :commit/tree ?root] (node-blobs ?root ?b)] + + ;;find all file names ?name that are within commit ?c's tree + [(commit-files ?c ?name) + [?c :commit/tree ?root] (node-files ?root ?name)] + + ;;find all file paths ?path that are within commit ?c's tree + [(commit-paths ?c ?path) + [?c :commit/tree ?root] (node-paths ?root ?path)] + + ;;find all codeqs ?cq for blobs that are within commit ?c's tree + [(commit-codeqs ?c ?cq) + (commit-blobs ?c ?b) [?cq :codeq/file ?b]] + + ;;find all commits ?c that include blob ?b + [(blob-commits ?b ?c) + (object-nodes ?b ?n) [?c :commit/tree ?n]] + + ;;find all commits ?c that include a file named ?name + [(file-commits ?name ?c) + (file-nodes ?name ?n) [?c :commit/tree ?n]] + + ;;find all commits ?c that include a file path named ?path + [(path-commits ?path ?c) + (path-nodes ?path ?n) [?c :commit/tree ?n]] + + ;;find all commits ?c that include a codeq ?cq + [(codeq-commits ?cq ?c) + [?cq :codeq/file ?b] (blob-commits ?b ?c)]]) + + +(defn find-commit-filenames + "Returns all file names that are part of the given commit." + [db commit-sha] + (mapv first + (d/q '[:find ?name + :in $ % ?sha + :where + [?c :git/sha ?sha] + (commit-files ?c ?name)] + db rules commit-sha))) + +(defn find-commit-filepaths + "Returns all file paths that are part of the given commit." + [db commit-sha] + (mapv first + (d/q '[:find ?path + :in $ % ?sha + :where + [?c :git/sha ?sha] + (commit-paths ?c ?path)] + db rules commit-sha))) + +(defn find-filename-commits + "Returns all commits that reference the given file name." + [db filename] + (mapv first + (d/q '[:find ?sha + :in $ % ?name + :where + (file-commits ?name ?c) + [?c :git/sha ?sha]] + db rules filename))) + +(defn find-filepath-commits + "Returns all commits that reference the given file path." + [db path] + (mapv first + (d/q '[:find ?sha + :in $ % ?path + :where + (path-commits ?path ?c) + [?c :git/sha ?sha]] + db rules path))) + + +(defn find-blob-commits + "Returns all commits that reference the given blob sha." + [db blob-sha] + (let [blob-id (index-get-id db :git/sha blob-sha)] + (mapv first + (d/q '[:find ?sha + :in $ % ?blob-id + :where + (blob-commits ?blob-id ?c) + [?c :git/sha ?sha]] + db rules blob-id)))) + + +;; Entity Helpers + +(defn get-root-tree-node + "Returns the root tree node entity for the given commit sha." + [db commit-sha] + (->> (d/datoms db :avet :git/sha commit-sha) + first :e (d/entity db) :commit/tree)) + +(defn node-name + "Returns the file name of a tree node entity." + [tree-node-entity] + (get-in tree-node-entity [:node/filename :file/name])) + +(defn node-paths + "Returns the file paths of a tree node entity." + [tree-node-entity] + (->> tree-node-entity + :node/paths + (map :file/name))) + +(defn get-tree-node-children + "Returns the children tree nodes of a tree node." + [tree-node-entity] + (let [o (:node/object tree-node-entity)] + (assert (= :tree (:git/type o))) + (:tree/nodes o))) diff --git a/test/datomic/codeq/test_git_util.clj b/test/datomic/codeq/test_git_util.clj new file mode 100644 index 0000000..e010a5d --- /dev/null +++ b/test/datomic/codeq/test_git_util.clj @@ -0,0 +1,87 @@ +(ns datomic.codeq.test-git-util + (:import java.io.File + java.nio.file.Files + java.nio.file.attribute.FileAttribute + org.eclipse.jgit.api.Git + org.eclipse.jgit.lib.PersonIdent)) + + +(defn create-temp-dir + "Returns a java.io.File handle to a fresh temporary directory." + [] + (.toFile (Files/createTempDirectory "codeqtest" (into-array FileAttribute [])))) + + +(defn init-repo + "Returns a GitPorcelain API object and an initialized git + repository for the given directory." + [dir] + (let [git (.. (Git/init) (setDirectory dir) (call))] + [git (.getRepository git)])) + + +(defn create-person-ident + "Returns a PersonIdent from the name and num + with the current timestamp." + [name num] + (let [ident (str name num) + email (str ident "@example.org")] + (PersonIdent. ident email))) + + +(defn author-ident + "Returns a PersonIdent called 'author'." + [& [num]] + (create-person-ident "author" num)) + + +(defn committer-ident + "Returns a Personident called 'committer'." + [& [num]] + (create-person-ident "committer" num)) + + +(defn add-file-to-index + "Add the given file name to the git index." + [git name] + (.. git (add) (addFilepattern name) (call))) + + +(defn git-commit-all + "Perform a git commit. + Equivalent to + git commit -a -m msg" + [git msg] + (.. git (commit) + (setAll true) + (setAuthor (author-ident)) + (setCommitter (committer-ident)) + (setMessage msg) + (call))) + + +(defn checkout-branch + "Perform a git checkout. + git checkout branch-name + If is-new is true then the branch is created. + git checkout -b branch-name" + [git branch-name & [is-new]] + (.. git + (checkout) + (setCreateBranch (true? is-new)) + (setName branch-name) + (call))) + + +(defn merge-branch-no-commit + "Perform a git merge, but stopping before committing. + Equivalent to + git merge --no-commit branch-name" + [git branch-name] + (let [repo (.getRepository git) + branch-id (.resolve repo branch-name)] + (.. git + (merge) + (include branch-id) + (setCommit false) + (call))))