-
Notifications
You must be signed in to change notification settings - Fork 23
/
html2org.txt
113 lines (91 loc) · 5.61 KB
/
html2org.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
#+begin_src emacs-lisp
;; works!!
(defun html2org-clipboard ()
"Convert clipboard contents from HTML to Org, remove base64-encoded images, and then paste (yank)."
(interactive)
(setq cmd "osascript -e 'the clipboard as \"HTML\"' | perl -ne 'print chr foreach unpack(\"C*\",pack(\"H*\",substr($_,11,-3)))' | pandoc -f html -t json | pandoc -f json -t org")
(setq org-content (shell-command-to-string cmd))
(setq org-content (replace-regexp-in-string "\\[\\[data:image[^]]*\\]\\]" "" org-content :fixedcase :literal))
(setq org-content (replace-regexp-in-string "^\\[\\[https://chat.openai.com.*$" "" org-content))
(setq org-content (replace-regexp-in-string "^\\[\\[https://lh3.googleusercontent.*$" "" org-content))
(setq org-content (replace-regexp-in-string "\\n\\n\\n\\n\\n\\n\\n" "\\n\\n" org-content))
(setq org-content (replace-regexp-in-string "\\n\\n\\n\\n" "\\n\\n" org-content))
(setq org-content (replace-regexp-in-string "" " " org-content))
(setq org-content (replace-regexp-in-string "\\\\\\\\" "" org-content))
(setq org-content (replace-regexp-in-string ":PROPERTIES:\n\\(.*\n\\)*?:END:" "" org-content))
(setq org-content (replace-regexp-in-string ":PROPERTIES:\\([^\000]*?\\):END:" "" org-content)) ;; somehow leaves stray square brackets in the output
;; Add the following line to replace "=" enclosed text with "~" enclosed text
(setq org-content (replace-regexp-in-string "\\(\\W\\|=\\|^\\)=\\([^=]*\\)=\\(\\W\\|=\\|$\\)" "\\1~\\2~\\3" org-content))
(kill-new org-content)
(yank)
;; (org-mode-restart)
)
;; works!!
(defun html2org-clipboard-and-unfill-region ()
"Convert clipboard contents from HTML to Org, remove base64-encoded images, and then paste (yank)."
(interactive)
(setq cmd "osascript -e 'the clipboard as \"HTML\"' | perl -ne 'print chr foreach unpack(\"C*\",pack(\"H*\",substr($_,11,-3)))' | pandoc -f html -t json | pandoc -f json -t org")
(setq org-content (shell-command-to-string cmd))
(setq org-content (replace-regexp-in-string "^\\[\\[data:image.*$" "" org-content))
(setq org-content (replace-regexp-in-string "^\\[\\[https://chat.openai.com.*$" "" org-content))
(setq org-content (replace-regexp-in-string "^\\[\\[https://lh3.googleusercontent.*$" "" org-content))
(setq org-content (replace-regexp-in-string "\\n\\n\\n\\n\\n\\n\\n" "\\n\\n" org-content))
(setq org-content (replace-regexp-in-string "\\\\\\\\" "" org-content))
(with-temp-buffer
(insert org-content)
(unfill-paragraph (point-min) (point-max))
(setq org-content (buffer-string)))
(kill-new org-content)
(yank))
(defun html2org-clipboard-and-unfill-region-clobber-blockquotes ()
"Convert clipboard contents from HTML to Org, remove base64-encoded images, and then paste (yank)."
(interactive)
(setq cmd "osascript -e 'the clipboard as \"HTML\"' | perl -ne 'print chr foreach unpack(\"C*\",pack(\"H*\",substr($_,11,-3)))' | pandoc -f html -t json | pandoc -f json -t org")
(setq org-content (shell-command-to-string cmd))
(setq org-content (replace-regexp-in-string "^\\[\\[data:image.*$" "" org-content))
(setq org-content (replace-regexp-in-string "^\\[\\[https://chat.openai.com.*$" "" org-content))
(setq org-content (replace-regexp-in-string "^\\[\\[https://lh3.googleusercontent.*$" "" org-content))
(setq org-content (replace-regexp-in-string "\\n\\n\\n\\n\\n\\n\\n" "\\n\\n" org-content))
(setq org-content (replace-regexp-in-string "\\\\\\\\" "" org-content))
(with-temp-buffer
(insert org-content)
(unfill-region (point-min) (point-max))
(setq org-content (buffer-string)))
(setq org-content (replace-regexp-in-string "#\\+end_quote" "\n#\\+end_quote" org-content))
(kill-new org-content)
(yank))
;; from ChatGPT. Doesn't work
(defun html2org-clipboard-and-unfill-region-but-preserve-blockquotes ()
"Convert clipboard contents from HTML to Org, remove base64-encoded images, and then paste (yank)."
(interactive)
(setq cmd "osascript -e 'the clipboard as \"HTML\"' | perl -ne 'print chr foreach unpack(\"C*\",pack(\"H*\",substr($_,11,-3)))' | pandoc -f html -t json | pandoc -f json -t org")
(setq org-content (shell-command-to-string cmd))
(setq org-content (replace-regexp-in-string "^\\[\\[data:image.*$" "" org-content))
(setq org-content (replace-regexp-in-string "^\\[\\[https://chat.openai.com.*$" "" org-content))
(setq org-content (replace-regexp-in-string "^\\[\\[https://lh3.googleusercontent.*$" "" org-content))
(setq org-content (replace-regexp-in-string "\\n\\n\\n\\n\\n\\n\\n" "\\n\\n" org-content))
(setq org-content (replace-regexp-in-string "\\\\\\\\" "" org-content))
(with-temp-buffer
(insert org-content)
(goto-char (point-min))
(while (not (eobp))
(unless (or (looking-at-p "^#\\+begin_quote") (looking-at-p "#\\+end_quote"))
(let ((start (point)))
(forward-paragraph)
(fill-region-as-paragraph start (point))))
(forward-line))
(setq org-content (buffer-string)))
(kill-new org-content)
(yank))
(defun html2org-clipboard-with-questions ()
"Convert clipboard contents from HTML to Org, remove base64-encoded images, and then paste (yank) with questions emphasized."
(interactive)
(setq cmd "osascript -e 'the clipboard as \"HTML\"' | perl -ne 'print chr foreach unpack(\"C*\",pack(\"H*\",substr($_,11,-3)))' | pandoc -f html -t json | pandoc -f json -t org")
(setq org-content (shell-command-to-string cmd))
(setq org-content (replace-regexp-in-string "^\\[\\[data:image.*$" "" org-content))
(setq org-content (replace-regexp-in-string "^\\[\\[https://chat.openai.com.*$" "" org-content))
(setq org-content (replace-regexp-in-string "\\n\\n\\n\\n\\n\\n\\n" "\\n\\n" org-content))
(setq org-content (replace-regexp-in-string "^\\(.*\\?\\)$" "* \\1" org-content))
(kill-new org-content)
(yank))
#+end_src