Skip to content

Commit

Permalink
Merge pull request #21 from Algorithmology/benchmark
Browse files Browse the repository at this point in the history
feat: Implement benchmarking tool
  • Loading branch information
simojo authored Apr 25, 2024
2 parents 215df0b + 077449a commit f96c291
Show file tree
Hide file tree
Showing 11 changed files with 554 additions and 1 deletion.
3 changes: 2 additions & 1 deletion .devenv/gc/shell-2-link
51 changes: 51 additions & 0 deletions bosco/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# How to Use the Bosco Tool

To run the Bosco tool, you can call it from the main directory using a poetry command formatted like
`poetry run bosco --starting-size 100 --number-doubles 5 --file bosco/sorting.py --function-name bubble_sort`.
The command should use the inputs `--starting-size` for the initial list size to start the doubling experiment,
`--number-doubles` for the number of times the input size will be doubled during the doubling experiment,
`--file` for the path to the file containing the sorting algorithm you want to run the doubling experiment on,
and `--function-name` for the name of the function containing the sorting algorithm you want to test.

## Example of command and output

### Command

```terminal
poetry run bosco --starting-size 100 --number-doubles 5 --file bosco/sorting.py --function-name quick_sort
```

### Output

```terminal
🐶 Bosco is fetching our results!
Path to the desired file: bosco/sorting.py
Name of the desired function: quick_sort
Starting size of the data container: 100
Number of doubles to execute: 5
📈 Here are the results from running the experiment!
╒══════════════╤═════════════╤══════════════╤════════════════╕
│ Input Size │ Best Case │ Worst Case │ Average Case │
╞══════════════╪═════════════╪══════════════╪════════════════╡
│ 100 │ 0.00058 │ 0.00061 │ 0.00060 │
├──────────────┼─────────────┼──────────────┼────────────────┤
│ 200 │ 0.00129 │ 0.00155 │ 0.00139 │
├──────────────┼─────────────┼──────────────┼────────────────┤
│ 400 │ 0.00268 │ 0.00374 │ 0.00305 │
├──────────────┼─────────────┼──────────────┼────────────────┤
│ 800 │ 0.00578 │ 0.00656 │ 0.00610 │
├──────────────┼─────────────┼──────────────┼────────────────┤
│ 1600 │ 0.01312 │ 0.01414 │ 0.01372 │
╘══════════════╧═════════════╧══════════════╧════════════════╛
```

### Graph Produced from Output

![example_graph](https://github.com/Algorithmology/bosco/assets/70417208/0be0e695-f06c-490a-98df-cb3eaaf5ca07)

Binary file added bosco/__pycache__/__init__.cpython-311.pyc
Binary file not shown.
Binary file added bosco/__pycache__/benchmark.cpython-311.pyc
Binary file not shown.
Binary file added bosco/__pycache__/generate.cpython-311.pyc
Binary file not shown.
Binary file added bosco/__pycache__/main.cpython-311.pyc
Binary file not shown.
Binary file added bosco/__pycache__/sorting.cpython-311.pyc
Binary file not shown.
32 changes: 32 additions & 0 deletions bosco/benchmark.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
"""Conduct doubling experiments for provided algorithms that perform list sorting."""

import os
import sys
from timeit import repeat
from typing import List, Tuple


def run_sorting_algorithm(
file_path: str, algorithm: str, array: List[int]
) -> Tuple[float, float, float]:
"""Run a sorting algorithm and profile it with the timeit package."""
directory, file_name = os.path.split(file_path)
module_name = os.path.splitext(file_name)[0]

if directory:
sys.path.append(directory)

try:
module = __import__(module_name)
algorithm_func = getattr(module, algorithm)
except (ImportError, AttributeError):
raise ValueError(f"Could not import {algorithm} from {file_path}")

stmt = f"{algorithm_func.__name__}({array})"
times = repeat(
setup=f"from {module_name} import {algorithm}",
stmt=stmt,
repeat=3,
number=10,
)
return min(times), max(times), sum(times) / len(times)
12 changes: 12 additions & 0 deletions bosco/generate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
"""Generate random container."""

import random
from typing import List


def generate_random_container(
size: int,
) -> List[int]:
"""Generate a random list defined by the size."""
random_list = [random.randint(1, size * size) for _ in range(size)]
return random_list
99 changes: 99 additions & 0 deletions bosco/main.py
Original file line number Diff line number Diff line change
@@ -1 +1,100 @@
"""Bosco runs benchmarks to assess the performance of Python functions."""

import plotly.graph_objs as go
import typer
from plotly.subplots import make_subplots
from rich.console import Console
from tabulate import tabulate

from bosco import benchmark, generate

cli = typer.Typer()
console = Console()


@cli.command()
def bosco(
starting_size: int = typer.Option(100),
number_doubles: int = typer.Option(5),
file: str = typer.Option("./bosco/sorting.py"),
function_name: str = typer.Option("bubble_sort"),
) -> None:
"""Conduct a doubling experiment to measure the performance of list sorting for a specific algorithm."""
console.print(
"\n:dog: Bosco is fetching our results!\n"
)
console.print(f"Path to the desired file: {file}\n")
console.print(f"Name of the desired function: {function_name}\n")
console.print(f"Starting size of the data container: {starting_size}\n")
console.print(f"Number of doubles to execute: {number_doubles}\n")
console.print("📈 Here are the results from running the experiment!\n")

all_results = []

for i in range(number_doubles):
size = starting_size * (2**i)
data_to_sort = generate.generate_random_container(size)
performance_data = benchmark.run_sorting_algorithm(
file, function_name, data_to_sort
)

(
best_time,
worst_time,
average_time,
) = performance_data # best, worst, and average times

all_results.append([best_time, worst_time, average_time])

header = ["Input Size", "Best Case", "Worst Case", "Average Case"]
data = [
[starting_size * 2**i, *results]
for i, results in enumerate(all_results)
]

table = tabulate(
data, headers=header, tablefmt="fancy_grid", floatfmt=".5f"
)
console.print(table)

# plot
fig = make_subplots(rows=1, cols=1)

x_values = [starting_size * (2**i) for i in range(number_doubles)]
best_case = [results[0] for results in all_results]
worst_case = [results[1] for results in all_results]
average_case = [results[2] for results in all_results]

trace_best = go.Scatter(
x=x_values,
y=best_case,
mode="lines+markers",
name="Best Case",
)
trace_worst = go.Scatter(
x=x_values,
y=worst_case,
mode="lines+markers",
name="Worst Case",
)
trace_average = go.Scatter(
x=x_values,
y=average_case,
mode="lines+markers",
name="Average Case",
)

fig.add_trace(trace_best)
fig.add_trace(trace_worst)
fig.add_trace(trace_average)

fig.update_layout(
title=f"Evaluating the Performance of {function_name}",
xaxis_title="Input Size",
yaxis_title="Execution Time (s)",
showlegend=True,
margin=dict(l=20, r=20, t=60, b=20),
title_x=0.5,
)

fig.show()
Loading

0 comments on commit f96c291

Please sign in to comment.