From 00e0cb8eb5d21b4ecf7fad5d572b18ed290412c8 Mon Sep 17 00:00:00 2001 From: maciejmaczko <86777379+maciejmaczko@users.noreply.github.com> Date: Sat, 14 Dec 2024 00:09:41 +0100 Subject: [PATCH] Update tim-sort.ts refactor(tim-sort): Implement robust Timsort algorithm with advanced run detection - Completely rewrite Tim Sort implementation to follow Timsort's canonical algorithm - add docs Key improvements: - Correctly handle valley and other complex input scenarios - Better merge strategy following Timsort's original design --- src/lib/sort-algorithms/tim-sort.ts | 291 +++++++++++++++++++--------- 1 file changed, 199 insertions(+), 92 deletions(-) diff --git a/src/lib/sort-algorithms/tim-sort.ts b/src/lib/sort-algorithms/tim-sort.ts index cbeaae0..d6312f6 100644 --- a/src/lib/sort-algorithms/tim-sort.ts +++ b/src/lib/sort-algorithms/tim-sort.ts @@ -1,139 +1,246 @@ -import type { SortingGenerator } from './types'; +import type { SortingGenerator } from "./types"; +/** + * The size at or below which a run is considered "small" and + * will be extended using insertion sort rather than merges. + * Traditionally, Timsort uses a minRun between 32 and 64. + */ const MIN_MERGE = 32; -const minRunLength = (n: number) => { - // Becomes 1 if any 1 bits are shifted off +/** + * Computes the minimum run length for Timsort given an array length. + * This follows Timsort's logic: + * - Repeatedly shift right until n < MIN_MERGE. + * - Track if any bits were shifted off (r), and add that to final result. + */ +function minRunLength(n: number): number { let r = 0; while (n >= MIN_MERGE) { r |= n & 1; n >>= 1; } return n + r; -}; - -// This function sorts array from left index to -// to right index which is of size atmost RUN -const insertionSort = function* ( +} + +/** + * Sorts a subarray using insertion sort. + * This is used by Timsort to extend small runs to ensure they are at least minRun long. + * + * @param arr - The array to sort. + * @param start - Start index of the subarray. + * @param end - End index of the subarray (inclusive). + */ +function* insertionSort( arr: number[], - left: number, - right: number + start: number, + end: number ): SortingGenerator { - for (let i = left + 1; i <= right; i++) { + for (let i = start + 1; i <= end; i++) { const temp = arr[i]; let j = i - 1; - - while (j >= left && arr[j] > temp) { + while (j >= start && arr[j] > temp) { yield { access: [i, j], sound: j }; arr[j + 1] = arr[j]; j--; } arr[j + 1] = temp; } -}; - -// Merge function merges the sorted runs -const merge = function* ( +} + +/** + * Scans forward from `start` to find a run (ascending or strictly descending). + * If run is descending, it is reversed. + * Returns the end index of the run (inclusive). + * + * @param arr - The array. + * @param start - Start index to detect run. + * @param n - Total length of the array. + */ +function* countRunAndMakeAscending( arr: number[], - l: number, - m: number, - r: number -): SortingGenerator { - // Original array is broken in two parts - // left and right array - const len1 = m - l + 1, - len2 = r - m; - const left = new Array(len1); - const right = new Array(len2); - for (let x = 0; x < len1; x++) { - yield { access: [l + x], sound: l + x }; - left[x] = arr[l + x]; + start: number, + n: number +): SortingGenerator { + let runEnd = start + 1; + if (runEnd === n) { + // Single element run + return start; } - for (let x = 0; x < len2; x++) { - yield { access: [m + 1 + x], sound: m + 1 + x }; - right[x] = arr[m + 1 + x]; + + // Determine if run is ascending or descending + yield { access: [start, runEnd], sound: runEnd }; + if (arr[start] <= arr[runEnd]) { + // Ascending run + while (runEnd < n - 1 && arr[runEnd] <= arr[runEnd + 1]) { + yield { access: [runEnd, runEnd + 1], sound: runEnd + 1 }; + runEnd++; + } + } else { + // Descending run + while (runEnd < n - 1 && arr[runEnd] > arr[runEnd + 1]) { + yield { access: [runEnd, runEnd + 1], sound: runEnd + 1 }; + runEnd++; + } + + // Reverse the descending run + let left = start; + let right = runEnd; + while (left < right) { + yield { access: [left, right], sound: left }; + [arr[left], arr[right]] = [arr[right], arr[left]]; + left++; + right--; + } } + return runEnd; +} + +/** + * Merges two sorted sub-runs: + * arr[start..mid] and arr[mid+1..end] + * + * This is a standard merging procedure but may be enhanced with Timsort's "galloping" optimization. + * For simplicity, we will not implement galloping here, just a standard merge. + * + * @param arr - The array. + * @param start - Start index of first run. + * @param mid - End index of first run. + * @param end - End index of second run. + */ +function* merge( + arr: number[], + start: number, + mid: number, + end: number +): SortingGenerator { + const len1 = mid - start + 1; + const len2 = end - mid; + const left = arr.slice(start, mid + 1); + const right = arr.slice(mid + 1, end + 1); + let i = 0; let j = 0; - let k = l; + let k = start; - // After comparing, we merge those two - // array in larger sub array while (i < len1 && j < len2) { yield { access: [k], sound: k }; - if (left[i] <= right[j]) { - arr[k] = left[i]; - i++; + arr[k++] = left[i++]; } else { - arr[k] = right[j]; - j++; + arr[k++] = right[j++]; } - k++; } - // Copy remaining elements - // of left, if any while (i < len1) { yield { access: [k], sound: k }; - - arr[k] = left[i]; - k++; - i++; + arr[k++] = left[i++]; } - // Copy remaining element - // of right, if any while (j < len2) { yield { access: [k], sound: k }; - - arr[k] = right[j]; - k++; - j++; + arr[k++] = right[j++]; } -}; - -// Iterative Timsort function to sort the -// array[0...n-1] (similar to merge sort) -export const timSort = function* (arr: number[]): SortingGenerator { +} + +/** + * Timsort main function: + * + * Steps: + * 1. Calculate minRun. + * 2. Identify natural runs, extend them to length at least minRun with insertion sort. + * 3. Push these runs to a stack. + * 4. Merge runs from the stack according to Timsort’s merge rules until only one run remains. + */ +export function* timSort(arr: number[]): SortingGenerator { const n = arr.length; - const minRun = minRunLength(MIN_MERGE); + if (n < 2) { + return; + } - // Sort individual subarrays of size RUN - for (let i = 0; i < n; i += minRun) { - yield { access: [i], sound: i }; + const minRun = minRunLength(n); - yield* insertionSort(arr, i, Math.min(i + MIN_MERGE - 1, n - 1)); - } + // This stack will store runs as [startIndex, length] + const runStack: Array<[number, number]> = []; + + let current = 0; + while (current < n) { + // Identify run + const runEnd = yield* countRunAndMakeAscending(arr, current, n); + let runLen = runEnd - current + 1; + + // Ensure run has length at least minRun + if (runLen < minRun) { + const forceLen = Math.min(minRun, n - current); + // Sort the small run using insertion sort + yield* insertionSort(arr, current, current + forceLen - 1); + runLen = forceLen; + } - // Start merging from size - // RUN (or 32). It will - // merge to form size 64, - // then 128, 256 and so on - // .... - for (let size = minRun; size < n; size = 2 * size) { - // Pick starting point - // of left sub array. We - // are going to merge - // arr[left..left+size-1] - // and arr[left+size, left+2*size-1] - // After every merge, we - // increase left by 2*size - for (let left = 0; left < n; left += 2 * size) { - // Find ending point of left sub array - // mid+1 is starting point of right sub - // array - const mid = left + size - 1; - const right = Math.min(left + 2 * size - 1, n - 1); - - yield { access: [left, right], sound: right }; - - // Merge sub array arr[left.....mid] & - // arr[mid+1....right] - if (mid < right) { - yield* merge(arr, left, mid, right); + // Push run onto stack + runStack.push([current, runLen]); + + // Timsort merging rules: + // While we have at least two runs on the stack: + // Check if merging is required according to Timsort constraints. + while (runStack.length > 1) { + const n = runStack.length; + const [baseZ, lenZ] = runStack[n - 1]; + const [baseY, lenY] = runStack[n - 2]; + + const shouldMerge = + (n > 2 && + runStack[n - 3][1] <= runStack[n - 2][1] + runStack[n - 1][1]) || + (lenY <= lenZ); + + if (!shouldMerge) break; + + if (n > 2) { + // Compare also run Y and run X (the ones below) + const [baseX, lenX] = runStack[n - 3]; + if (lenX <= lenY + lenZ) { + // Merge Y with smaller of Z or X + if (lenX < lenZ) { + yield* doMerge(arr, runStack, n - 3, n - 2); + } else { + yield* doMerge(arr, runStack, n - 2, n - 1); + } + continue; + } } + + yield* doMerge(arr, runStack, n - 2, n - 1); } + + current += runLen; } -}; + + // Merge all remaining runs + while (runStack.length > 1) { + const n = runStack.length; + // Merge the last two runs + yield* doMerge(arr, runStack, n - 2, n - 1); + } +} + +/** + * Merges the runs at runStack[i] and runStack[i+1] and replaces them with a single run. + */ +function* doMerge( + arr: number[], + runStack: Array<[number, number]>, + i: number, + j: number +): SortingGenerator { + const [base1, len1] = runStack[i]; + const [base2, len2] = runStack[j]; + + yield { access: [base1, base2], sound: base2 }; + + // Merge runs arr[base1..base1+len1-1] and arr[base2..base2+len2-1] + yield* merge(arr, base1, base1 + len1 - 1, base1 + len1 + len2 - 1); + + // Update the run stack, replacing the two runs with the merged run + runStack[i] = [base1, len1 + len2]; + runStack.splice(j, 1); +}