diff --git a/.devenv/gc/shell-2-link b/.devenv/gc/shell-2-link index 9e3a17b..cb146e0 120000 --- a/.devenv/gc/shell-2-link +++ b/.devenv/gc/shell-2-link @@ -1 +1,2 @@ -/nix/store/3dad29jif1y2w72bly039pfs75ndy5y6-devenv-shell-env \ No newline at end of file + +/nix/store/kfc3lrq3q6kgp12ic8vhmskgdyah6msf-devenv-shell-env diff --git a/bosco/README.md b/bosco/README.md new file mode 100644 index 0000000..47e8ffc --- /dev/null +++ b/bosco/README.md @@ -0,0 +1,51 @@ +# How to Use the Bosco Tool + +To run the Bosco tool, you can call it from the main directory using a poetry command formatted like +`poetry run bosco --starting-size 100 --number-doubles 5 --file bosco/sorting.py --function-name bubble_sort`. +The command should use the inputs `--starting-size` for the initial list size to start the doubling experiment, +`--number-doubles` for the number of times the input size will be doubled during the doubling experiment, +`--file` for the path to the file containing the sorting algorithm you want to run the doubling experiment on, +and `--function-name` for the name of the function containing the sorting algorithm you want to test. + +## Example of command and output + +### Command + +```terminal +poetry run bosco --starting-size 100 --number-doubles 5 --file bosco/sorting.py --function-name quick_sort +``` + +### Output + +```terminal +๐Ÿถ Bosco is fetching our results! + +Path to the desired file: bosco/sorting.py + +Name of the desired function: quick_sort + +Starting size of the data container: 100 + +Number of doubles to execute: 5 + +๐Ÿ“ˆ Here are the results from running the experiment! + +โ•’โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•คโ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•คโ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•คโ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•• +โ”‚ Input Size โ”‚ Best Case โ”‚ Worst Case โ”‚ Average Case โ”‚ +โ•žโ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•ชโ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•ชโ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•ชโ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•ก +โ”‚ 100 โ”‚ 0.00058 โ”‚ 0.00061 โ”‚ 0.00060 โ”‚ +โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค +โ”‚ 200 โ”‚ 0.00129 โ”‚ 0.00155 โ”‚ 0.00139 โ”‚ +โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค +โ”‚ 400 โ”‚ 0.00268 โ”‚ 0.00374 โ”‚ 0.00305 โ”‚ +โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค +โ”‚ 800 โ”‚ 0.00578 โ”‚ 0.00656 โ”‚ 0.00610 โ”‚ +โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค +โ”‚ 1600 โ”‚ 0.01312 โ”‚ 0.01414 โ”‚ 0.01372 โ”‚ +โ•˜โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•งโ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•งโ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•งโ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•› +``` + +### Graph Produced from Output + +![example_graph](https://github.com/Algorithmology/bosco/assets/70417208/0be0e695-f06c-490a-98df-cb3eaaf5ca07) + diff --git a/bosco/__pycache__/__init__.cpython-311.pyc b/bosco/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000..fffd47f Binary files /dev/null and b/bosco/__pycache__/__init__.cpython-311.pyc differ diff --git a/bosco/__pycache__/benchmark.cpython-311.pyc b/bosco/__pycache__/benchmark.cpython-311.pyc new file mode 100644 index 0000000..0bac433 Binary files /dev/null and b/bosco/__pycache__/benchmark.cpython-311.pyc differ diff --git a/bosco/__pycache__/generate.cpython-311.pyc b/bosco/__pycache__/generate.cpython-311.pyc new file mode 100644 index 0000000..13d5b48 Binary files /dev/null and b/bosco/__pycache__/generate.cpython-311.pyc differ diff --git a/bosco/__pycache__/main.cpython-311.pyc b/bosco/__pycache__/main.cpython-311.pyc new file mode 100644 index 0000000..74c1ed5 Binary files /dev/null and b/bosco/__pycache__/main.cpython-311.pyc differ diff --git a/bosco/__pycache__/sorting.cpython-311.pyc b/bosco/__pycache__/sorting.cpython-311.pyc new file mode 100644 index 0000000..be89bea Binary files /dev/null and b/bosco/__pycache__/sorting.cpython-311.pyc differ diff --git a/bosco/benchmark.py b/bosco/benchmark.py new file mode 100644 index 0000000..63547e0 --- /dev/null +++ b/bosco/benchmark.py @@ -0,0 +1,32 @@ +"""Conduct doubling experiments for provided algorithms that perform list sorting.""" + +import os +import sys +from timeit import repeat +from typing import List, Tuple + + +def run_sorting_algorithm( + file_path: str, algorithm: str, array: List[int] +) -> Tuple[float, float, float]: + """Run a sorting algorithm and profile it with the timeit package.""" + directory, file_name = os.path.split(file_path) + module_name = os.path.splitext(file_name)[0] + + if directory: + sys.path.append(directory) + + try: + module = __import__(module_name) + algorithm_func = getattr(module, algorithm) + except (ImportError, AttributeError): + raise ValueError(f"Could not import {algorithm} from {file_path}") + + stmt = f"{algorithm_func.__name__}({array})" + times = repeat( + setup=f"from {module_name} import {algorithm}", + stmt=stmt, + repeat=3, + number=10, + ) + return min(times), max(times), sum(times) / len(times) diff --git a/bosco/generate.py b/bosco/generate.py new file mode 100644 index 0000000..e52522e --- /dev/null +++ b/bosco/generate.py @@ -0,0 +1,12 @@ +"""Generate random container.""" + +import random +from typing import List + + +def generate_random_container( + size: int, +) -> List[int]: + """Generate a random list defined by the size.""" + random_list = [random.randint(1, size * size) for _ in range(size)] + return random_list \ No newline at end of file diff --git a/bosco/main.py b/bosco/main.py index 654869d..ed24d8e 100644 --- a/bosco/main.py +++ b/bosco/main.py @@ -1 +1,100 @@ """Bosco runs benchmarks to assess the performance of Python functions.""" + +import plotly.graph_objs as go +import typer +from plotly.subplots import make_subplots +from rich.console import Console +from tabulate import tabulate + +from bosco import benchmark, generate + +cli = typer.Typer() +console = Console() + + +@cli.command() +def bosco( + starting_size: int = typer.Option(100), + number_doubles: int = typer.Option(5), + file: str = typer.Option("./bosco/sorting.py"), + function_name: str = typer.Option("bubble_sort"), +) -> None: + """Conduct a doubling experiment to measure the performance of list sorting for a specific algorithm.""" + console.print( + "\n:dog: Bosco is fetching our results!\n" + ) + console.print(f"Path to the desired file: {file}\n") + console.print(f"Name of the desired function: {function_name}\n") + console.print(f"Starting size of the data container: {starting_size}\n") + console.print(f"Number of doubles to execute: {number_doubles}\n") + console.print("๐Ÿ“ˆ Here are the results from running the experiment!\n") + + all_results = [] + + for i in range(number_doubles): + size = starting_size * (2**i) + data_to_sort = generate.generate_random_container(size) + performance_data = benchmark.run_sorting_algorithm( + file, function_name, data_to_sort + ) + + ( + best_time, + worst_time, + average_time, + ) = performance_data # best, worst, and average times + + all_results.append([best_time, worst_time, average_time]) + + header = ["Input Size", "Best Case", "Worst Case", "Average Case"] + data = [ + [starting_size * 2**i, *results] + for i, results in enumerate(all_results) + ] + + table = tabulate( + data, headers=header, tablefmt="fancy_grid", floatfmt=".5f" + ) + console.print(table) + + # plot + fig = make_subplots(rows=1, cols=1) + + x_values = [starting_size * (2**i) for i in range(number_doubles)] + best_case = [results[0] for results in all_results] + worst_case = [results[1] for results in all_results] + average_case = [results[2] for results in all_results] + + trace_best = go.Scatter( + x=x_values, + y=best_case, + mode="lines+markers", + name="Best Case", + ) + trace_worst = go.Scatter( + x=x_values, + y=worst_case, + mode="lines+markers", + name="Worst Case", + ) + trace_average = go.Scatter( + x=x_values, + y=average_case, + mode="lines+markers", + name="Average Case", + ) + + fig.add_trace(trace_best) + fig.add_trace(trace_worst) + fig.add_trace(trace_average) + + fig.update_layout( + title=f"Evaluating the Performance of {function_name}", + xaxis_title="Input Size", + yaxis_title="Execution Time (s)", + showlegend=True, + margin=dict(l=20, r=20, t=60, b=20), + title_x=0.5, + ) + + fig.show() diff --git a/bosco/sorting.py b/bosco/sorting.py new file mode 100644 index 0000000..294d635 --- /dev/null +++ b/bosco/sorting.py @@ -0,0 +1,358 @@ +"""Sorting algorithms for lists that contain integer values.""" + +from random import randint +from typing import List + + +def bubble_sort(array: List[int]) -> List[int]: + """Sort an input list called array using bubble sort.""" + n = len(array) + + for i in range(n): + # Create a flag that will allow the function to + # terminate early if there's nothing left to sort + already_sorted = True + + # Start looking at each item of the list one by one, + # comparing it with its adjacent value. With each + # iteration, the portion of the array that you look at + # shrinks because the remaining items have already been + # sorted. + for j in range(n - i - 1): + if array[j] > array[j + 1]: + # If the item you're looking at is greater than its + # adjacent value, then swap them + array[j], array[j + 1] = array[j + 1], array[j] + + # Since you had to swap two elements, + # set the `already_sorted` flag to `False` so the + # algorithm doesn't finish prematurely + already_sorted = False + + # If there were no swaps during the last iteration, + # the array is already sorted, and you can terminate + if already_sorted: + break + + return array + + +def insertion_sort(array: List[int]) -> List[int]: + """Run an insertion sort on the provided array.""" + # Loop from the second element of the array until + # the last element + for i in range(1, len(array)): + # This is the element we want to position in its + # correct place + key_item = array[i] + + # Initialize the variable that will be used to + # find the correct position of the element referenced + # by `key_item` + j = i - 1 + + # Run through the list of items (the left + # portion of the array) and find the correct position + # of the element referenced by `key_item`. Do this only + # if `key_item` is smaller than its adjacent values. + while j >= 0 and array[j] > key_item: + # Shift the value one position to the left + # and reposition j to point to the next element + # (from right to left) + array[j + 1] = array[j] + j -= 1 + + # When you finish shifting the elements, you can position + # `key_item` in its correct location + array[j + 1] = key_item + + return array + + +def merge(left: List[int], right: List[int]) -> List[int]: + """Define a convenience method that supports the merging of lists.""" + # If the first array is empty, then nothing needs + # to be merged, and you can return the second array as the result + if len(left) == 0: + return right + + # If the second array is empty, then nothing needs + # to be merged, and you can return the first array as the result + if len(right) == 0: + return left + + result: List[int] = [] + index_left = index_right = 0 + + # Now go through both arrays until all the elements + # make it into the resultant array + while len(result) < len(left) + len(right): + # The elements need to be sorted to add them to the + # resultant array, so you need to decide whether to get + # the next element from the first or the second array + if left[index_left] <= right[index_right]: + result.append(left[index_left]) + index_left += 1 + else: + result.append(right[index_right]) + index_right += 1 + + # If you reach the end of either array, then you can + # add the remaining elements from the other array to + # the result and break the loop + if index_right == len(right): + result += left[index_left:] + break + + if index_left == len(left): + result += right[index_right:] + break + + return result + + +def merge_sort(array: List[int]) -> List[int]: + """Sort the provided list called array with the merge sort algorithm.""" + # If the input array contains fewer than two elements, + # then return it as the result of the function + if len(array) < 2: + return array + + midpoint = len(array) // 2 + + # Sort the array by recursively splitting the input + # into two equal halves, sorting each half and merging them + # together into the final result + return merge( + left=merge_sort(array[:midpoint]), right=merge_sort(array[midpoint:]) + ) + + +def quick_sort(array: List[int]) -> List[int]: + """Sort the provided list called array with the quick sort algorithm.""" + # If the input array contains fewer than two elements, + # then return it as the result of the function + if len(array) < 2: + return array + + low, same, high = [], [], [] + + # Select your `pivot` element randomly + pivot = array[randint(0, len(array) - 1)] + + for item in array: + # Elements that are smaller than the `pivot` go to + # the `low` list. Elements that are larger than + # `pivot` go to the `high` list. Elements that are + # equal to `pivot` go to the `same` list. + if item < pivot: + low.append(item) + elif item == pivot: + same.append(item) + elif item > pivot: + high.append(item) + + # The final result combines the sorted `low` list + # with the `same` list and the sorted `high` list + return quick_sort(low) + same + quick_sort(high) + + +def insertion_sort_tim(array: List[int], left: int = 0, right=None): + """Use an internal sorting algorithm for the timsort algorithm.""" + if right is None: + right = len(array) - 1 + + # Loop from the element indicated by + # `left` until the element indicated by `right` + for i in range(left + 1, right + 1): + # This is the element we want to position in its + # correct place + key_item = array[i] + + # Initialize the variable that will be used to + # find the correct position of the element referenced + # by `key_item` + j = i - 1 + + # Run through the list of items (the left + # portion of the array) and find the correct position + # of the element referenced by `key_item`. Do this only + # if the `key_item` is smaller than its adjacent values. + while j >= left and array[j] > key_item: + # Shift the value one position to the left + # and reposition `j` to point to the next element + # (from right to left) + array[j + 1] = array[j] + j -= 1 + + # When you finish shifting the elements, position + # the `key_item` in its correct location + array[j + 1] = key_item + + return array + + +def tim_sort(array: List[int]) -> List[int]: + """Sort the list called array with the tim sort algorithm using a special insertion sort.""" + min_run = 32 + n = len(array) + + # Start by slicing and sorting small portions of the + # input array. The size of these slices is defined by + # your `min_run` size. + for i in range(0, n, min_run): + insertion_sort_tim(array, i, min((i + min_run - 1), n - 1)) + + # Now you can start merging the sorted slices. + # Start from `min_run`, doubling the size on + # each iteration until you surpass the length of + # the array. + size = min_run + while size < n: + # Determine the arrays that will + # be merged together + for start in range(0, n, size * 2): + # Compute the `midpoint` (where the first array ends + # and the second starts) and the `endpoint` (where + # the second array ends) + midpoint = start + size - 1 + end = min((start + size * 2 - 1), (n - 1)) + + # Merge the two subarrays. + # The `left` array should go from `start` to + # `midpoint + 1`, while the `right` array should + # go from `midpoint + 1` to `end + 1`. + merged_array = merge( + left=array[start : midpoint + 1], + right=array[midpoint + 1 : end + 1], + ) + + # Finally, put the merged array back into + # your array + array[start : start + len(merged_array)] = merged_array + + # Each iteration should double the size of your arrays + size *= 2 + + return array + + +# Selection Sort algorithm in Python +def selection_sort(array): + for s in range(len(array)): + min_idx = s + + for i in range(s + 1, len(array)): + # For sorting in descending order + # for minimum element in each loop + if array[i] < array[min_idx]: + min_idx = i + + # Arranging min at the correct position + (array[s], array[min_idx]) = (array[min_idx], array[s]) + + +def heapify(array, n, i): + largest = i + l = 2 * i + 1 + r = 2 * i + 2 + + if l < n and array[i] < array[l]: + largest = l + if r < n and array[largest] < array[r]: + largest = r + + if largest != i: + array[i], array[largest] = array[largest], array[i] + heapify(array, n, largest) + + +def heap_sort(array): + n = len(array) + for i in range(n // 2, -1, -1): + heapify(array, n, i) + for i in range(n - 1, 0, -1): + array[i], array[0] = array[0], array[i] + heapify(array, i, 0) + return array + + +def shell_sort(array): + n = len(array) + interval = n // 2 + while interval > 0: + for i in range(interval, n): + temp = array[i] + j = i + while j >= interval and array[j - interval] > temp: + array[j] = array[j - interval] + j -= interval + + array[j] = temp + interval //= 2 + return array + + +# Radix sort in Python + + +# Using counting sort to sort the elements in the basis of significant places +def countingSort_radix(array, place): + size = len(array) + output = [0] * size + count = [0] * 10 + + # Calculate count of elements + for i in range(0, size): + index = array[i] // place + count[index % 10] += 1 + + # Calculate cumulative count + for i in range(1, 10): + count[i] += count[i - 1] + + # Place the elements in sorted order + i = size - 1 + while i >= 0: + index = array[i] // place + output[count[index % 10] - 1] = array[i] + count[index % 10] -= 1 + i -= 1 + + for i in range(0, size): + array[i] = output[i] + + +# Main function to implement radix sort +def radix_sort(array): + # Get maximum element + max_element = max(array) + + # Apply counting sort to sort elements based on place value. + place = 1 + while max_element // place > 0: + countingSort_radix(array, place) + place *= 10 + return array + + +def bucket_sort(arr): + n = len(arr) + buckets = [[] for _ in range(n)] + + # Put array elements in different buckets + for num in arr: + bi = min(int(n * num), n - 1) + buckets[bi].append(num) + + # Sort individual buckets using insertion sort + for bucket in buckets: + insertion_sort(bucket) + + # Concatenate all buckets into arr[] + index = 0 + for bucket in buckets: + for num in bucket: + arr[index] = num + index += 1 + return arr