-
-
Notifications
You must be signed in to change notification settings - Fork 14
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #10 from maciejmaczko/feat/timsort-robust-implemen…
…tation Update tim-sort.ts
- Loading branch information
Showing
1 changed file
with
199 additions
and
92 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,139 +1,246 @@ | ||
import type { SortingGenerator } from './types'; | ||
import type { SortingGenerator } from "./types"; | ||
|
||
/** | ||
* The size at or below which a run is considered "small" and | ||
* will be extended using insertion sort rather than merges. | ||
* Traditionally, Timsort uses a minRun between 32 and 64. | ||
*/ | ||
const MIN_MERGE = 32; | ||
|
||
const minRunLength = (n: number) => { | ||
// Becomes 1 if any 1 bits are shifted off | ||
/** | ||
* Computes the minimum run length for Timsort given an array length. | ||
* This follows Timsort's logic: | ||
* - Repeatedly shift right until n < MIN_MERGE. | ||
* - Track if any bits were shifted off (r), and add that to final result. | ||
*/ | ||
function minRunLength(n: number): number { | ||
let r = 0; | ||
while (n >= MIN_MERGE) { | ||
r |= n & 1; | ||
n >>= 1; | ||
} | ||
return n + r; | ||
}; | ||
|
||
// This function sorts array from left index to | ||
// to right index which is of size atmost RUN | ||
const insertionSort = function* ( | ||
} | ||
|
||
/** | ||
* Sorts a subarray using insertion sort. | ||
* This is used by Timsort to extend small runs to ensure they are at least minRun long. | ||
* | ||
* @param arr - The array to sort. | ||
* @param start - Start index of the subarray. | ||
* @param end - End index of the subarray (inclusive). | ||
*/ | ||
function* insertionSort( | ||
arr: number[], | ||
left: number, | ||
right: number | ||
start: number, | ||
end: number | ||
): SortingGenerator { | ||
for (let i = left + 1; i <= right; i++) { | ||
for (let i = start + 1; i <= end; i++) { | ||
const temp = arr[i]; | ||
let j = i - 1; | ||
|
||
while (j >= left && arr[j] > temp) { | ||
while (j >= start && arr[j] > temp) { | ||
yield { access: [i, j], sound: j }; | ||
arr[j + 1] = arr[j]; | ||
j--; | ||
} | ||
arr[j + 1] = temp; | ||
} | ||
}; | ||
|
||
// Merge function merges the sorted runs | ||
const merge = function* ( | ||
} | ||
|
||
/** | ||
* Scans forward from `start` to find a run (ascending or strictly descending). | ||
* If run is descending, it is reversed. | ||
* Returns the end index of the run (inclusive). | ||
* | ||
* @param arr - The array. | ||
* @param start - Start index to detect run. | ||
* @param n - Total length of the array. | ||
*/ | ||
function* countRunAndMakeAscending( | ||
arr: number[], | ||
l: number, | ||
m: number, | ||
r: number | ||
): SortingGenerator { | ||
// Original array is broken in two parts | ||
// left and right array | ||
const len1 = m - l + 1, | ||
len2 = r - m; | ||
const left = new Array(len1); | ||
const right = new Array(len2); | ||
for (let x = 0; x < len1; x++) { | ||
yield { access: [l + x], sound: l + x }; | ||
left[x] = arr[l + x]; | ||
start: number, | ||
n: number | ||
): SortingGenerator<number> { | ||
let runEnd = start + 1; | ||
if (runEnd === n) { | ||
// Single element run | ||
return start; | ||
} | ||
for (let x = 0; x < len2; x++) { | ||
yield { access: [m + 1 + x], sound: m + 1 + x }; | ||
right[x] = arr[m + 1 + x]; | ||
|
||
// Determine if run is ascending or descending | ||
yield { access: [start, runEnd], sound: runEnd }; | ||
if (arr[start] <= arr[runEnd]) { | ||
// Ascending run | ||
while (runEnd < n - 1 && arr[runEnd] <= arr[runEnd + 1]) { | ||
yield { access: [runEnd, runEnd + 1], sound: runEnd + 1 }; | ||
runEnd++; | ||
} | ||
} else { | ||
// Descending run | ||
while (runEnd < n - 1 && arr[runEnd] > arr[runEnd + 1]) { | ||
yield { access: [runEnd, runEnd + 1], sound: runEnd + 1 }; | ||
runEnd++; | ||
} | ||
|
||
// Reverse the descending run | ||
let left = start; | ||
let right = runEnd; | ||
while (left < right) { | ||
yield { access: [left, right], sound: left }; | ||
[arr[left], arr[right]] = [arr[right], arr[left]]; | ||
left++; | ||
right--; | ||
} | ||
} | ||
|
||
return runEnd; | ||
} | ||
|
||
/** | ||
* Merges two sorted sub-runs: | ||
* arr[start..mid] and arr[mid+1..end] | ||
* | ||
* This is a standard merging procedure but may be enhanced with Timsort's "galloping" optimization. | ||
* For simplicity, we will not implement galloping here, just a standard merge. | ||
* | ||
* @param arr - The array. | ||
* @param start - Start index of first run. | ||
* @param mid - End index of first run. | ||
* @param end - End index of second run. | ||
*/ | ||
function* merge( | ||
arr: number[], | ||
start: number, | ||
mid: number, | ||
end: number | ||
): SortingGenerator { | ||
const len1 = mid - start + 1; | ||
const len2 = end - mid; | ||
const left = arr.slice(start, mid + 1); | ||
const right = arr.slice(mid + 1, end + 1); | ||
|
||
let i = 0; | ||
let j = 0; | ||
let k = l; | ||
let k = start; | ||
|
||
// After comparing, we merge those two | ||
// array in larger sub array | ||
while (i < len1 && j < len2) { | ||
yield { access: [k], sound: k }; | ||
|
||
if (left[i] <= right[j]) { | ||
arr[k] = left[i]; | ||
i++; | ||
arr[k++] = left[i++]; | ||
} else { | ||
arr[k] = right[j]; | ||
j++; | ||
arr[k++] = right[j++]; | ||
} | ||
k++; | ||
} | ||
|
||
// Copy remaining elements | ||
// of left, if any | ||
while (i < len1) { | ||
yield { access: [k], sound: k }; | ||
|
||
arr[k] = left[i]; | ||
k++; | ||
i++; | ||
arr[k++] = left[i++]; | ||
} | ||
|
||
// Copy remaining element | ||
// of right, if any | ||
while (j < len2) { | ||
yield { access: [k], sound: k }; | ||
|
||
arr[k] = right[j]; | ||
k++; | ||
j++; | ||
arr[k++] = right[j++]; | ||
} | ||
}; | ||
|
||
// Iterative Timsort function to sort the | ||
// array[0...n-1] (similar to merge sort) | ||
export const timSort = function* (arr: number[]): SortingGenerator { | ||
} | ||
|
||
/** | ||
* Timsort main function: | ||
* | ||
* Steps: | ||
* 1. Calculate minRun. | ||
* 2. Identify natural runs, extend them to length at least minRun with insertion sort. | ||
* 3. Push these runs to a stack. | ||
* 4. Merge runs from the stack according to Timsort’s merge rules until only one run remains. | ||
*/ | ||
export function* timSort(arr: number[]): SortingGenerator { | ||
const n = arr.length; | ||
const minRun = minRunLength(MIN_MERGE); | ||
if (n < 2) { | ||
return; | ||
} | ||
|
||
// Sort individual subarrays of size RUN | ||
for (let i = 0; i < n; i += minRun) { | ||
yield { access: [i], sound: i }; | ||
const minRun = minRunLength(n); | ||
|
||
yield* insertionSort(arr, i, Math.min(i + MIN_MERGE - 1, n - 1)); | ||
} | ||
// This stack will store runs as [startIndex, length] | ||
const runStack: Array<[number, number]> = []; | ||
|
||
let current = 0; | ||
while (current < n) { | ||
// Identify run | ||
const runEnd = yield* countRunAndMakeAscending(arr, current, n); | ||
let runLen = runEnd - current + 1; | ||
|
||
// Ensure run has length at least minRun | ||
if (runLen < minRun) { | ||
const forceLen = Math.min(minRun, n - current); | ||
// Sort the small run using insertion sort | ||
yield* insertionSort(arr, current, current + forceLen - 1); | ||
runLen = forceLen; | ||
} | ||
|
||
// Start merging from size | ||
// RUN (or 32). It will | ||
// merge to form size 64, | ||
// then 128, 256 and so on | ||
// .... | ||
for (let size = minRun; size < n; size = 2 * size) { | ||
// Pick starting point | ||
// of left sub array. We | ||
// are going to merge | ||
// arr[left..left+size-1] | ||
// and arr[left+size, left+2*size-1] | ||
// After every merge, we | ||
// increase left by 2*size | ||
for (let left = 0; left < n; left += 2 * size) { | ||
// Find ending point of left sub array | ||
// mid+1 is starting point of right sub | ||
// array | ||
const mid = left + size - 1; | ||
const right = Math.min(left + 2 * size - 1, n - 1); | ||
|
||
yield { access: [left, right], sound: right }; | ||
|
||
// Merge sub array arr[left.....mid] & | ||
// arr[mid+1....right] | ||
if (mid < right) { | ||
yield* merge(arr, left, mid, right); | ||
// Push run onto stack | ||
runStack.push([current, runLen]); | ||
|
||
// Timsort merging rules: | ||
// While we have at least two runs on the stack: | ||
// Check if merging is required according to Timsort constraints. | ||
while (runStack.length > 1) { | ||
const n = runStack.length; | ||
const [baseZ, lenZ] = runStack[n - 1]; | ||
const [baseY, lenY] = runStack[n - 2]; | ||
|
||
const shouldMerge = | ||
(n > 2 && | ||
runStack[n - 3][1] <= runStack[n - 2][1] + runStack[n - 1][1]) || | ||
(lenY <= lenZ); | ||
|
||
if (!shouldMerge) break; | ||
|
||
if (n > 2) { | ||
// Compare also run Y and run X (the ones below) | ||
const [baseX, lenX] = runStack[n - 3]; | ||
if (lenX <= lenY + lenZ) { | ||
// Merge Y with smaller of Z or X | ||
if (lenX < lenZ) { | ||
yield* doMerge(arr, runStack, n - 3, n - 2); | ||
} else { | ||
yield* doMerge(arr, runStack, n - 2, n - 1); | ||
} | ||
continue; | ||
} | ||
} | ||
|
||
yield* doMerge(arr, runStack, n - 2, n - 1); | ||
} | ||
|
||
current += runLen; | ||
} | ||
}; | ||
|
||
// Merge all remaining runs | ||
while (runStack.length > 1) { | ||
const n = runStack.length; | ||
// Merge the last two runs | ||
yield* doMerge(arr, runStack, n - 2, n - 1); | ||
} | ||
} | ||
|
||
/** | ||
* Merges the runs at runStack[i] and runStack[i+1] and replaces them with a single run. | ||
*/ | ||
function* doMerge( | ||
arr: number[], | ||
runStack: Array<[number, number]>, | ||
i: number, | ||
j: number | ||
): SortingGenerator { | ||
const [base1, len1] = runStack[i]; | ||
const [base2, len2] = runStack[j]; | ||
|
||
yield { access: [base1, base2], sound: base2 }; | ||
|
||
// Merge runs arr[base1..base1+len1-1] and arr[base2..base2+len2-1] | ||
yield* merge(arr, base1, base1 + len1 - 1, base1 + len1 + len2 - 1); | ||
|
||
// Update the run stack, replacing the two runs with the merged run | ||
runStack[i] = [base1, len1 + len2]; | ||
runStack.splice(j, 1); | ||
} |