Skip to content

Commit

Permalink
Finish implementation of Table.from_json. Closes #344.
Browse files Browse the repository at this point in the history
  • Loading branch information
onyxfish committed Oct 31, 2015
2 parents acb7406 + b8720fe commit 708d0c8
Show file tree
Hide file tree
Showing 8 changed files with 251 additions and 30 deletions.
1 change: 1 addition & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

Version 1.1.0 introduces one major breaking API change. The Table, Table.from_csv and TableSet.from_csv methods now all takes "column_names" and "column_types" as separate arguments instead of as a list of tuples. Was done to facilitate type inference and to streamline the API in various other places.

* Table.from_json is implemented. (#344, #347)
* Date and DateTime type testing now takes specified format into account. (#361)
* Number column now takes a float_precision argument.
* Number columns can now be populated with float values. (#370)
Expand Down
152 changes: 122 additions & 30 deletions agate/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,11 @@
from itertools import chain
import sys

if sys.version_info < (2, 7):
import simplejson as json
else:
import json

try:
from collections import OrderedDict
except ImportError: # pragma: no cover
Expand All @@ -47,7 +52,7 @@
from agate.mapped_sequence import MappedSequence
from agate.preview import print_table, print_bars
from agate.rows import Row
from agate.utils import NullOrder, Patchable, max_precision, make_number_formatter, round_limits, letter_name
from agate.utils import NullOrder, Patchable, max_precision, make_number_formatter, round_limits, letter_name, parse_object

if six.PY2:
from agate import csv_py2 as csv
Expand Down Expand Up @@ -179,6 +184,44 @@ def __init__(self, rows, column_names=None, column_types=None, row_names=None, _

self._columns = MappedSequence(new_columns, self._column_names)


@property
def column_types(self):
"""
Get an ordered sequence of this table's column types.
:returns: A sequence of :class:`.DataType` instances.
"""
return self._column_types

@property
def column_names(self):
"""
Get an ordered sequence of this table's column names.
"""
return self._column_names

@property
def row_names(self):
"""
Get an ordered sequence of this table's row names.
"""
return self._row_names

@property
def columns(self):
"""
Get this tables' :class:`.MappedSequence` of columns.
"""
return self._columns

@property
def rows(self):
"""
Get this tables' :class:`.MappedSequence` of rows.
"""
return self._rows

def _fork(self, rows, column_names=None, column_types=None, row_names=None):
"""
Create a new table using the metadata from this one.
Expand Down Expand Up @@ -262,42 +305,91 @@ def to_csv(self, path, **kwargs):
if close:
f.close()

@property
def column_types(self):
@classmethod
def from_json(cls, path, row_names=None, key=None, **kwargs):
"""
Get an ordered sequence of this table's column types.
Create a new table from a JSON file. Contents should be an array
containing a dictionary for each "row". Nested objects or lists will
also be parsed. For example, this object:
:returns: A sequence of :class:`.DataType` instances.
"""
return self._column_types
.. code-block:: javascript
@property
def column_names(self):
"""
Get an ordered sequence of this table's column names.
"""
return self._column_names
{
'one': {
'a': 1,
'b': 2,
'c': 3
},
'two': [4, 5, 6],
'three': 'd'
}
@property
def row_names(self):
"""
Get an ordered sequence of this table's row names.
"""
return self._row_names
Would generate these columns and values:
@property
def columns(self):
"""
Get this tables' :class:`.MappedSequence` of columns.
"""
return self._columns
.. code-block:: python
@property
def rows(self):
"""
Get this tables' :class:`.MappedSequence` of rows.
{
'one/a': 1,
'one/b': 2,
'one/c': 3,
'two.0': 4,
'two.1': 5,
'two.2': 6,
'three': 'd'
}
Column names and types will be inferred from the data. Not all rows are
required to have the same keys. Missing elements will be filled in with
null.
If the file contains a top-level dictionary you may specify what
property contains the row list using the ``key`` parameter.
``kwargs`` will be passed through to :meth:`json.load`.
:param path:
Filepath or file-like object from which to read CSV data.
:param row_names:
See :meth:`Table.__init__`.
:key:
The key of the top-level dictionary that contains a list of row
arrays.
"""
return self._rows
if hasattr(path, 'read'):
js = json.load(path, object_pairs_hook=OrderedDict, **kwargs)
else:
with open(path, 'r') as f:
js = json.load(f, object_pairs_hook=OrderedDict, **kwargs)

if isinstance(js, dict):
if not key:
raise TypeError('When converting a JSON document with a top-level dictionary element, a key must be specified.')

js = js[key]

column_names = []
row_objects = []

for obj in js:
parsed = parse_object(obj)

for key in parsed.keys():
if key not in column_names:
column_names.append(key)

row_objects.append(parsed)

rows = []

for obj in row_objects:
r = []

for name in column_names:
r.append(obj.get(name, None))

rows.append(r)

return Table(rows, column_names, row_names=row_names)

@allow_tableset_proxy
def select(self, selected_names):
Expand Down
27 changes: 27 additions & 0 deletions agate/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,18 @@
from functools import wraps
import string

try:
from collections import OrderedDict
except ImportError: # pragma: no cover
from ordereddict import OrderedDict

try:
from cdecimal import Decimal, ROUND_FLOOR, ROUND_CEILING
except ImportError: #pragma: no cover
from decimal import Decimal, ROUND_FLOOR, ROUND_CEILING

import six

def memoize(func):
"""
Dead-simple memoize decorator for instance methods that take no arguments.
Expand Down Expand Up @@ -205,3 +212,23 @@ def letter_name(index):
count = len(letters)

return letters[index % count] * ((index // count) + 1)

def parse_object(obj, path=''):
"""
Recursively parse JSON objects and a dictionary of paths/keys and values.
Inspired by JSONPipe (https://github.com/dvxhouse/jsonpipe).
"""
if isinstance(obj, dict):
iterator = obj.items()
elif isinstance(obj, (list, tuple)):
iterator = enumerate(obj)
else:
return { path.strip('/'): obj }

d = OrderedDict()

for key, value in iterator:
key = six.text_type(key)
d.update(parse_object(value, path + key + '/'))

return d
17 changes: 17 additions & 0 deletions examples/test.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
[
{
"one": 1,
"two": 4,
"three": "a"
},
{
"one": 2,
"two": 3,
"three": "b"
},
{
"one": null,
"two": 2,
"three": "👍"
}
]
1 change: 1 addition & 0 deletions requirements-py2.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,4 @@ Babel>=2.0
parsedatetime>=1.5
pytz>=2015.4
mock>=1.3.0
simplejson>=2.1
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

if sys.version_info == (2, 6):
install_requires.append('ordereddict>=1.1')
install_requires.append('simplejson>=2.1')

setup(
name='agate',
Expand Down
81 changes: 81 additions & 0 deletions tests/test_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,87 @@ def test_from_csv(self):
def test_from_csv_columns_and_header(self):
column_names = ['a', 'b', 'c']

def test_from_json(self):
table = Table.from_json('examples/test.json')

self.assertEqual(len(table.columns), 3)
self.assertEqual(len(table.rows), 3)

self.assertSequenceEqual(table.column_names, ['one', 'two', 'three'])
self.assertIsInstance(table.columns[0].data_type, Number)
self.assertIsInstance(table.columns[1].data_type, Number)
self.assertIsInstance(table.columns[2].data_type, Text)

self.assertSequenceEqual(table.rows[0], [1, 4, 'a'])
self.assertSequenceEqual(table.rows[1], [2, 3, 'b'])
self.assertSequenceEqual(table.rows[2], [None, 2, u'👍'])

def test_from_json_file_like_object(self):
with open('examples/test.json') as f:
table = Table.from_json(f)

self.assertEqual(len(table.columns), 3)
self.assertEqual(len(table.rows), 3)

self.assertSequenceEqual(table.column_names, ['one', 'two', 'three'])
self.assertIsInstance(table.columns[0].data_type, Number)
self.assertIsInstance(table.columns[1].data_type, Number)
self.assertIsInstance(table.columns[2].data_type, Text)

self.assertSequenceEqual(table.rows[0], [1, 4, 'a'])
self.assertSequenceEqual(table.rows[1], [2, 3, 'b'])
self.assertSequenceEqual(table.rows[2], [None, 2, u'👍'])

def test_from_json_with_key(self):
table = Table.from_json('examples/test_key.json', key='data')

self.assertEqual(len(table.columns), 3)
self.assertEqual(len(table.rows), 3)

self.assertSequenceEqual(table.column_names, ['one', 'two', 'three'])
self.assertIsInstance(table.columns[0].data_type, Number)
self.assertIsInstance(table.columns[1].data_type, Number)
self.assertIsInstance(table.columns[2].data_type, Text)

self.assertSequenceEqual(table.rows[0], [1, 4, 'a'])
self.assertSequenceEqual(table.rows[1], [2, 3, 'b'])
self.assertSequenceEqual(table.rows[2], [None, 2, u'👍'])

def test_from_json_mixed_keys(self):
table = Table.from_json('examples/test_mixed.json')

self.assertEqual(len(table.columns), 5)
self.assertEqual(len(table.rows), 3)

self.assertSequenceEqual(table.column_names, ['one', 'two', 'three', 'four', 'five'])
self.assertIsInstance(table.columns[0].data_type, Number)
self.assertIsInstance(table.columns[1].data_type, Number)
self.assertIsInstance(table.columns[2].data_type, Text)
self.assertIsInstance(table.columns[3].data_type, Text)
self.assertIsInstance(table.columns[4].data_type, Number)

self.assertSequenceEqual(table.rows[0], [1, 4, 'a', None, None])
self.assertSequenceEqual(table.rows[1], [2, 3, 'b', 'd', None])
self.assertSequenceEqual(table.rows[2], [None, 2, u'👍', None, 5])

def test_from_json_nested(self):
table = Table.from_json('examples/test_nested.json')

self.assertEqual(len(table.columns), 6)
self.assertEqual(len(table.rows), 2)

self.assertSequenceEqual(table.column_names, ['one', 'two/two_a', 'two/two_b', 'three/0', 'three/1', 'three/2'])
self.assertIsInstance(table.columns[0].data_type, Number)
self.assertIsInstance(table.columns[1].data_type, Text)
self.assertIsInstance(table.columns[2].data_type, Text)
self.assertIsInstance(table.columns[3].data_type, Text)
self.assertIsInstance(table.columns[4].data_type, Number)
self.assertIsInstance(table.columns[5].data_type, Text)

self.assertSequenceEqual(table.rows[0], [1, 'a', 'b', 'a', 2, 'c'])
self.assertSequenceEqual(table.rows[1], [2, 'c', 'd', 'd', 2, 'f'])

def test_from_csv_file_like_object(self):
table = Table.from_csv('examples/test.csv', column_names, self.column_types)

self.assertEqual(len(table.columns), 3)
Expand Down
1 change: 1 addition & 0 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ deps=
{[testenv:py27]deps}
unittest2==0.5.1
ordereddict>=1.1
simplejson>=2.1

[testenv:py33]
deps=
Expand Down

0 comments on commit 708d0c8

Please sign in to comment.