-
-
Notifications
You must be signed in to change notification settings - Fork 287
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[aggr-] add rank aggregator, cmds addcol-aggregate/sheetrank
- Loading branch information
Showing
5 changed files
with
200 additions
and
8 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
#!vd -p | ||
{"sheet": "global", "col": null, "row": "disp_date_fmt", "longname": "set-option", "input": "%b %d, %Y", "keystrokes": "", "comment": null} | ||
{"longname": "open-file", "input": "sample_data/test.jsonl", "keystrokes": "o"} | ||
{"sheet": "test", "col": "key2", "row": "", "longname": "key-col", "input": "", "keystrokes": "!", "comment": "toggle current column as a key column"} | ||
{"sheet": "test", "col": "key2", "row": "", "longname": "addcol-aggregate", "input": "count", "comment": "add column(s) with aggregator of rows grouped by key columns"} | ||
{"sheet": "test", "col": "qty", "row": "", "longname": "type-float", "input": "", "keystrokes": "%", "comment": "set type of current column to float"} | ||
{"sheet": "test", "col": "qty", "row": "", "longname": "addcol-aggregate", "input": "rank sum", "comment": "add column(s) with aggregator of rows grouped by key columns"} | ||
{"sheet": "test", "col": "qty_sum", "row": "", "longname": "addcol-sheetrank", "input": "", "comment": "add column with the rank of each row based on its key columns"} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
key2 key2_count key1 qty qty_rank qty_sum test_sheetrank amt | ||
foo 2 2016-01-01 11:00:00 1.00 1 31.00 5 | ||
0 2016-01-01 1:00 2.00 1 66.00 2 3 | ||
baz 3 4.00 1 292.00 4 43.2 | ||
#ERR 0 #ERR #ERR 1 0.00 1 #ERR #ERR | ||
bar 2 2017-12-25 8:44 16.00 2 16.00 3 .3 | ||
baz 3 32.00 2 292.00 4 3.3 | ||
0 2018-07-27 4:44 64.00 2 66.00 2 9.1 | ||
bar 2 2018-07-27 16:44 1 16.00 3 | ||
baz 3 2018-07-27 18:44 256.00 3 292.00 4 .01 | ||
foo 2 2018-10-20 18:44 30.00 2 31.00 5 .01 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
import itertools | ||
|
||
from visidata import Sheet, ListAggregator, SettableColumn | ||
from visidata import vd, anytype, asyncthread | ||
|
||
class RankAggregator(ListAggregator): | ||
''' | ||
Ranks start at 1, and each group's rank is 1 higher than the previous group. | ||
When elements are tied in ranking, each of them gets the same rank. | ||
''' | ||
def aggregate(self, col, rows) -> [int]: | ||
return self.aggregate_list(col, rows) | ||
|
||
def aggregate_list(self, col, rows) -> [int]: | ||
if not col.sheet.keyCols: | ||
vd.error('ranking requires one or more key columns') | ||
return None | ||
return self.rank(col, rows) | ||
|
||
def rank(self, col, rows): | ||
# compile row data, for each row a list of tuples: (group_key, rank_key, rownum) | ||
rowdata = [(col.sheet.rowkey(r), col.getTypedValue(r), rownum) for rownum, r in enumerate(rows)] | ||
# sort by row key and column value to prepare for grouping | ||
try: | ||
rowdata.sort() | ||
except TypeError as e: | ||
vd.fail(f'elements in a ranking column must be comparable: {e.args[0]}') | ||
rowvals = [] | ||
#group by row key | ||
for _, group in itertools.groupby(rowdata, key=lambda v: v[0]): | ||
# within a group, the rows have already been sorted by col_val | ||
group = list(group) | ||
# rank each group individually | ||
group_ranks = rank_sorted_iterable([col_val for _, col_val, rownum in group]) | ||
rowvals += [(rownum, rank) for (_, _, rownum), rank in zip(group, group_ranks)] | ||
# sort by unique rownum, to make rank results match the original row order | ||
rowvals.sort() | ||
rowvals = [ rank for rownum, rank in rowvals ] | ||
return rowvals | ||
|
||
vd.aggregators['rank'] = RankAggregator('rank', anytype, helpstr='list of ranks, when grouping by key columns', listtype=int) | ||
|
||
def rank_sorted_iterable(vals_sorted) -> [int]: | ||
'''*vals_sorted* is an iterable whose elements form one group. | ||
The iterable must already be sorted.''' | ||
|
||
ranks = [] | ||
val_groups = itertools.groupby(vals_sorted) | ||
for rank, (_, val_group) in enumerate(val_groups, 1): | ||
for _ in val_group: | ||
ranks.append(rank) | ||
return ranks | ||
|
||
@Sheet.api | ||
@asyncthread | ||
def addcol_sheetrank(sheet, rows): | ||
''' | ||
Each row is ranked within its sheet. Rows are ordered by the | ||
value of their key columns. | ||
''' | ||
colname = f'{sheet.name}_sheetrank' | ||
c = SettableColumn(name=colname, type=int) | ||
sheet.addColumnAtCursor(c) | ||
if not sheet.keyCols: | ||
vd.error('ranking requires one or more key columns') | ||
return None | ||
rowkeys = [(sheet.rowkey(r), rownum) for rownum, r in enumerate(rows)] | ||
rowkeys.sort() | ||
ranks = rank_sorted_iterable([rowkey for rowkey, rownum in rowkeys]) | ||
row_ranks = sorted(zip((rownum for _, rownum in rowkeys), ranks)) | ||
row_ranks = [rank for rownum, rank in row_ranks] | ||
c.setValues(sheet.rows, *row_ranks) | ||
|
||
Sheet.addCommand('', 'addcol-sheetrank', 'sheet.addcol_sheetrank(rows)', 'add column with the rank of each row based on its key columns') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters