Skip to content

Commit

Permalink
support stats/extended_stats result extraction
Browse files Browse the repository at this point in the history
  • Loading branch information
onesuper committed Nov 16, 2016
1 parent 131c369 commit 610f263
Show file tree
Hide file tree
Showing 5 changed files with 21 additions and 5 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@

### 0.2.0

* support metric agg: `stats`, `extended_stats`
* support boolean filter: `like`, `rlike`, `startswith`, `notnull`
* display time in `df.show()`

Expand Down
6 changes: 5 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ df.sort(ScriptSorter('doc["age"].value * 2')).collect()
df[df.gender == 'male'].agg(df.age.avg).collect()
# [Row(avg(age)=12)]

# Groupby
# Groupby only (will give the `doc_count`)
df.groupby('gender').collect()
# [Row(doc_count=1), Row(doc_count=2)]

Expand All @@ -120,6 +120,10 @@ df[df.gender == 'male'].agg(df.age.avg).to_pandas()
# avg(age)
# 0 12

# Advanced ES functinality
df.groupby(df.gender).agg(df.age.stats).to_pandas()
df.agg(df.age.extended_stats).to_pandas()
df.agg(df.age.percentiles).to_pandas()
```


Expand Down
4 changes: 2 additions & 2 deletions pandasticsearch/operators.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# -*- coding: UTF-8 -*-

_metric_aggs = ('avg', 'min', 'max', 'cardinality', 'value_count', 'sum'
'percentiles', 'percentile_ranks')
_metric_aggs = ('avg', 'min', 'max', 'cardinality', 'value_count', 'sum',
'percentiles', 'percentile_ranks', 'stats', 'extended_stats')

_sort_mode = ('min', 'max', 'sum', 'avg', 'median')

Expand Down
7 changes: 5 additions & 2 deletions pandasticsearch/queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,8 +192,11 @@ def _process_agg(cls, bucket, indexes=(), names=()):
row[k] = v['value']
elif 'values' in v: # percentiles
row = v['values']
if k == 'doc_count': # count docs
row['doc_count'] = v
else:
row.update(v) # stats
else:
if k == 'doc_count': # count docs
row['doc_count'] = v

if len(row) > 0:
yield (names, indexes, row)
Expand Down
8 changes: 8 additions & 0 deletions pandasticsearch/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,14 @@ def percentiles(self):
def percentile_ranks(self):
return MetricAggregator(self._field, 'percentile_ranks')

@property
def stats(self):
return MetricAggregator(self._field, 'stats')

@property
def extended_stats(self):
return MetricAggregator(self._field, 'extended_stats')


class Row(tuple):
"""
Expand Down

0 comments on commit 610f263

Please sign in to comment.