forked from TeamDevDev/debuggingbook
-
Notifications
You must be signed in to change notification settings - Fork 0
/
nbdepend.py
executable file
·209 lines (166 loc) · 7.02 KB
/
nbdepend.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
#!/usr/bin/env python
# Issue dependencies for given notebook(s)
"""
usage:
python nbdepend.py A.ipynb B.ipynb C.ipynb > Makefile_deps
"""
import io, os, types, re
from IPython import get_ipython
from IPython.core.interactiveshell import InteractiveShell
import nbformat
import argparse
import textwrap
import warnings
import markdown
from bs4 import BeautifulSoup
from graphviz import Digraph, Source
RE_IMPORT = re.compile(r"^ *import *([a-zA-Z0-9_]+)", re.MULTILINE)
RE_FROM = re.compile(r"^ *from *([a-zA-Z0-9_]+) *import", re.MULTILINE)
def notebook_dependencies(notebook_name, include_minor_dependencies=True, path=None):
# notebook_path = import_notebooks.find_notebook(notebook_name, path)
notebook_path = notebook_name
# load the notebook
with io.open(notebook_path, 'r', encoding='utf-8') as f:
notebook = nbformat.read(f, 4)
shell = InteractiveShell.instance()
modules = set()
for cell in notebook.cells:
if cell.cell_type == 'code':
# transform the input to executable Python
code = shell.input_transformer_manager.transform_cell(cell.source)
if not include_minor_dependencies and code.find('# minor') >= 0:
continue
for match in re.finditer(RE_IMPORT, code):
modules.add(match.group(1))
for match in re.finditer(RE_FROM, code):
modules.add(match.group(1))
return modules
def print_notebook_dependencies(notebooks):
for notebook_name in notebooks:
for module in notebook_dependencies(notebook_name):
print(module)
def get_title(notebook):
"""Return the title from a notebook file"""
contents = get_text_contents(notebook)
match = re.search(r'^# (.*)', contents, re.MULTILINE)
if match is None:
warnings.warn(notebook + ": no title")
return notebook
title = match.group(1).replace(r'\n', '')
# print("Title", title.encode('utf-8'))
return title
def get_intro(notebook):
"""Return the first paragraph from a notebook file"""
intro = get_text_contents(notebook).strip()
while intro.startswith('#'):
intro = intro[intro.index('\n') + 1:]
intro = intro[:intro.find('\n\n')]
return intro
def markdown_to_text(s):
"""Convert Markdown to plain text"""
html = markdown.markdown(s)
return "".join(BeautifulSoup(html, features='lxml').findAll(text=True)).strip()
def format_title(title):
"""Break title into two lines if too long"""
title = textwrap.fill(title, break_long_words=False, width=20)
title = title.replace(" of\n", "\nof ")
title = title.replace("Failure\nOrigins", "\nFailure Origins")
return title
def get_text_contents(notebook):
with io.open(notebook, 'r', encoding='utf-8') as f:
nb = nbformat.read(f, as_version=4)
contents = ""
for cell in nb.cells:
if cell.cell_type == 'markdown':
contents += "".join(cell.source) + "\n\n"
# print("Contents of", notebook, ": ", repr(contents[:100]))
return contents
def draw_notebook_dependencies(notebooks,
format='svg', transitive_reduction=True, clusters=True, project='fuzzingbook'):
dot = Digraph(comment="Notebook dependencies")
# dot.attr(size='20,30', rank='max')
if project == 'debuggingbook':
fontname = 'Raleway, Helvetica, Arial, sans-serif'
fontcolor = '#6A0DAD'
else:
fontname = 'Patua One, Helvetica, sans-serif'
fontcolor = '#B03A2E'
node_attrs = {
'shape': 'note', # note, plain, none
'style': 'filled',
'fontname': fontname,
'fontcolor': fontcolor,
'fillcolor': 'white'
}
cluster = None
cluster_attrs = {
'shape': 'plain', # note, plain, none
'style': 'filled',
'fontname': fontname,
'fontcolor': 'black',
'color': '#F0F0F0',
}
for notebook_name in notebooks:
dirname = os.path.dirname(notebook_name)
basename = os.path.splitext(os.path.basename(notebook_name))[0]
title = get_title(notebook_name)
intro = markdown_to_text(get_intro(notebook_name))
tooltip = f'{title} ({basename})\n\n{intro}'
if clusters:
if title.startswith("Part"):
if cluster is not None:
cluster.attr(**cluster_attrs)
dot.subgraph(cluster)
cluster = Digraph(name='cluster_' + basename)
cluster.node(basename, label=format_title(title),
URL='%s.ipynb' % basename,
tooltip=basename, shape='plain', fontname=fontname)
elif cluster is not None:
cluster.node(basename)
for module in notebook_dependencies(notebook_name,
include_minor_dependencies=False):
module_file = os.path.join(dirname, module + ".ipynb")
if module_file in notebooks:
module_title = get_title(module_file)
module_intro = markdown_to_text(get_intro(module_file))
module_tooltip = f'{module_title} ({module})\n\n{module_intro}'
dot.node(basename, URL='%s.ipynb' % basename,
label=format_title(title), tooltip=tooltip, **node_attrs)
dot.node(module, URL='%s.ipynb' % module,
label=format_title(module_title), tooltip=module_tooltip, **node_attrs)
dot.edge(module, basename)
if cluster is not None:
cluster.attr(**cluster_attrs)
dot.subgraph(cluster)
if transitive_reduction:
# Reduce the graph using 'tred'
dot.format = 'gv'
dot.save('depend.gv')
os.system('tred depend.gv > depend.gv~ && mv depend.gv~ depend.gv')
dot = Source.from_file('depend.gv')
# Render the graph
dot.format = format
dot.render('depend')
# Print on standard output
with open('depend.' + format, 'r') as file:
for line in file:
print(line, end="")
# Clean up
for suffix in ['', '.' + format, '.gv~', '.gv']:
try:
os.remove('depend' + suffix)
except FileNotFoundError:
pass
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--graph", action='store_true', help="Produce graph")
parser.add_argument("--graph-format", action='store', default='svg', help="Graph format (gv, pdf, svg, ...)")
parser.add_argument("--project", action='store', help="Project name")
parser.add_argument("--transitive-reduction", action='store_true', help="Use transitive reduction")
parser.add_argument("--cluster-by-parts", action='store_true', help="Cluster by parts")
parser.add_argument("notebooks", nargs='*', help="notebooks to determine dependencies from")
args = parser.parse_args()
if args.graph:
draw_notebook_dependencies(args.notebooks, args.graph_format, args.transitive_reduction, args.cluster_by_parts, args.project)
else:
print_notebook_dependencies(args.notebooks)