Skip to content

Commit

Permalink
Merge pull request #16 from chimpler/list-of-dict-merge-fix
Browse files Browse the repository at this point in the history
Merging of list of dictionaries
  • Loading branch information
darthbear committed Apr 20, 2015
2 parents f368bf3 + fdb539e commit dc208c8
Show file tree
Hide file tree
Showing 4 changed files with 204 additions and 56 deletions.
86 changes: 45 additions & 41 deletions pyhocon/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import re
import os
import socket
from pyhocon.config_tree import ConfigTree, ConfigSubstitution, ConfigList, ConfigValues, ConfigUnquotedString
from pyhocon.config_tree import ConfigTree, ConfigSubstitution, ConfigList, ConfigValues, ConfigUnquotedString, \
ConfigInclude
from pyhocon.exceptions import ConfigSubstitutionException
from pyparsing import *

Expand All @@ -12,6 +13,7 @@
except ImportError:
# Fall back to Python 2's urllib2
from urllib2 import urlopen

use_urllib2 = True


Expand Down Expand Up @@ -82,6 +84,7 @@ def parse(content, basedir=None):
:type content: basestring
:return: a ConfigTree or a list
"""

def norm_string(value):
for k, v in ConfigParser.REPLACEMENTS.items():
value = value.replace(k, v)
Expand Down Expand Up @@ -134,7 +137,7 @@ def include_config(token):
path = file if basedir is None else os.path.join(basedir, file)
obj = ConfigFactory.parse_file(path)

return [obj]
return ConfigInclude(obj if isinstance(obj, list) else obj.items())

ParserElement.setDefaultWhitespaceChars(' \t')

Expand All @@ -147,7 +150,8 @@ def include_config(token):
eol = Word('\n\r').suppress()
eol_comma = Word('\n\r,').suppress()
comment = Suppress(Optional(eol_comma) + (Literal('#') | Literal('//')) - SkipTo(eol))
number_expr = Regex('[+-]?(\d*\.\d+|\d+(\.\d+)?)([eE]\d+)?(?=[ \t]*([\$\}\],#\n\r]|//))', re.DOTALL).setParseAction(convert_number)
number_expr = Regex('[+-]?(\d*\.\d+|\d+(\.\d+)?)([eE]\d+)?(?=[ \t]*([\$\}\],#\n\r]|//))',
re.DOTALL).setParseAction(convert_number)

# multi line string using """
# Using fix described in http://pyparsing.wikispaces.com/share/view/3778969
Expand All @@ -159,32 +163,34 @@ def include_config(token):
# line1 \
# line2 \
# so a backslash precedes the \n
unquoted_string = Regex(r'(\\[ \t]*[\r\n]|[^\[\{\n\]\}#,=\$])+?(?=(\$|[ \t]*(//|[\}\],#\n\r])))', re.DOTALL).setParseAction(unescape_string)
substitution_expr = Regex('\$\{[^\}]+\}\s*').setParseAction(create_substitution)
unquoted_string = Regex(r'(\\[ \t]*[\r\n]|[^\[\{\n\]\}#,=\$])+?(?=(\$|[ \t]*(//|[\}\],#\n\r])))',
re.DOTALL).setParseAction(unescape_string)
substitution_expr = Regex('\$\{[^\}]+\}[ \t]*').setParseAction(create_substitution)
string_expr = multiline_string | quoted_string | unquoted_string

value_expr = number_expr | true_expr | false_expr | null_expr | string_expr

include_expr = (Keyword("include", caseless=True).suppress() - (
quoted_string | ((Keyword('url') | Keyword('file')) - Literal('(').suppress() - quoted_string - Literal(')').suppress())))\
quoted_string | ((Keyword('url') | Keyword('file')) - Literal('(').suppress() - quoted_string - Literal(')').suppress()))) \
.setParseAction(include_config)

dict_expr = Forward()
list_expr = Forward()
multi_value_expr = ZeroOrMore((Literal(
'\\') - eol).suppress() | comment | include_expr | substitution_expr | dict_expr | list_expr | value_expr)
# for a dictionary : or = is optional
# last zeroOrMore is because we can have t = {a:4} {b: 6} {c: 7} which is dictionary concatenation
inside_dict_expr = ConfigTreeParser(ZeroOrMore(comment | include_expr | assign_expr | eol_comma))
dict_expr = Suppress('{') - inside_dict_expr - Suppress('}')
list_expr = Suppress('[') - ListParser(ZeroOrMore(comment | dict_expr | value_expr | eol_comma)) - Suppress(']')
dict_expr << Suppress('{') - inside_dict_expr - Suppress('}')
list_entry = ConcatenatedValueParser(multi_value_expr)
list_expr << Suppress('[') - ListParser(list_entry - ZeroOrMore(eol_comma - list_entry)) - Suppress(']')

# special case when we have a value assignment where the string can potentially be the remainder of the line
assign_expr << Group(key - (dict_expr | Suppress(Literal('=') | Literal(':')) -
(ConcatenatedValueParser(
ZeroOrMore(substitution_expr | list_expr | dict_expr | comment | value_expr | (Literal('\\') - eol).suppress())
))))
assign_expr << Group(
key - (dict_expr | Suppress(Literal('=') | Literal(':')) - ConcatenatedValueParser(multi_value_expr)))

# the file can be { ... } where {} can be omitted or []
config_expr = ZeroOrMore(comment | eol) \
+ (list_expr | dict_expr | inside_dict_expr) \
+ ZeroOrMore(comment | eol_comma)
config_expr = ZeroOrMore(comment | eol) + (list_expr | dict_expr | inside_dict_expr) + ZeroOrMore(comment | eol_comma)
config = config_expr.parseString(content, parseAll=True)[0]
ConfigParser._resolve_substitutions(config, substitutions)
return config
Expand All @@ -198,10 +204,11 @@ def _resolve_variable(config, substitution):
# default to environment variable
value = os.environ.get(variable)
if value is None:
raise ConfigSubstitutionException("Cannot resolve variable ${{{variable}}} (line: {line}, col: {col})".format(
variable=variable,
line=lineno(substitution.loc, substitution.instring),
col=col(substitution.loc, substitution.instring)))
raise ConfigSubstitutionException(
"Cannot resolve variable ${{{variable}}} (line: {line}, col: {col})".format(
variable=variable,
line=lineno(substitution.loc, substitution.instring),
col=col(substitution.loc, substitution.instring)))
elif isinstance(value, ConfigList) or isinstance(value, ConfigTree):
raise ConfigSubstitutionException(
"Cannot substitute variable ${{{variable}}} because it does not point to a "
Expand All @@ -227,8 +234,8 @@ def _resolve_substitutions(config, substitutions):
config_values = substitution.parent
# if it is a string, then add the extra ws that was present in the original string after the substitution
formatted_resolved_value = \
resolved_value + substitution.ws if isinstance(resolved_value, str) \
and substitution.index < len(config_values.tokens) - 1 else resolved_value
resolved_value + substitution.ws \
if isinstance(resolved_value, str) and substitution.index < len(config_values.tokens) - 1 else resolved_value
config_values.put(substitution.index, formatted_resolved_value)
transformation = config_values.transform()
result = transformation[0] if isinstance(transformation, list) else transformation
Expand Down Expand Up @@ -262,12 +269,14 @@ def postParse(self, instring, loc, token_list):
:param token_list:
:return:
"""
config_list = ConfigList(token_list)
cleaned_token_list = [token for tokens in (token.tokens if isinstance(token, ConfigInclude) else [token]
for token in token_list if token != '')
for token in tokens]
config_list = ConfigList(cleaned_token_list)
return [config_list]


class ConcatenatedValueParser(TokenConverter):

def __init__(self, expr=None):
super(ConcatenatedValueParser, self).__init__(expr)
self.parent = None
Expand All @@ -282,6 +291,7 @@ class ConfigTreeParser(TokenConverter):
"""
Parse a config tree from tokens
"""

def __init__(self, expr=None):
super(ConfigTreeParser, self).__init__(expr)
self.saveAsList = True
Expand All @@ -296,34 +306,28 @@ def postParse(self, instring, loc, token_list):
"""
config_tree = ConfigTree()
for element in token_list:
# from include then merge items
expanded_tokens = element.items() if isinstance(element, ConfigTree) else [element]
expanded_tokens = element.tokens if isinstance(element, ConfigInclude) else [element]

for tokens in expanded_tokens:
# key, value1, value2, ...
# key, value1 (optional), ...
key = tokens[0].strip()
values = tokens[1:]

# empty string
if len(values) == 0:
config_tree.put(key, '')
else:
if isinstance(values[0], list):
# Merge arrays
config_tree.put(key, values[0], False)
for value in values[1:]:
config_tree.put(key, value, True)
value = values[0]
if isinstance(value, list):
config_tree.put(key, value, False)
else:
# Merge dict
for value in values:
if isinstance(value, ConfigList):
conf_value = list(value)
elif isinstance(value, ConfigValues):
conf_value = value
value.parent = config_tree
value.key = key
else:
conf_value = value
config_tree.put(key, conf_value)
if isinstance(value, ConfigValues):
conf_value = value
value.parent = config_tree
value.key = key
else:
conf_value = value
config_tree.put(key, conf_value)

return config_tree
22 changes: 16 additions & 6 deletions pyhocon/config_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,12 @@ def __init__(self, iterable=[]):
for index, value in enumerate(l):
if isinstance(value, ConfigValues):
value.parent = self
value.index = index
value.key = index


class ConfigInclude(object):
def __init__(self, tokens):
self.tokens = tokens


class ConfigValues(object):
Expand Down Expand Up @@ -260,14 +265,19 @@ def determine_type(token):
# update references for substituted contents
if isinstance(val, ConfigValues):
val.parent = result
val.index = key
val.key = key
result[key] = val

return result
elif first_tok_type is ConfigList:
result = ConfigList()
for token in self.tokens:
result.extend(token)
result = []
for sublist in self.tokens:
sublist_result = ConfigList()
for index, token in enumerate(sublist):
if isinstance(token, ConfigValues):
token.parent = result
token.key = index
sublist_result.append(token)
result.extend(sublist_result)
return [result]
else:
if len(self.tokens) == 1:
Expand Down
140 changes: 135 additions & 5 deletions tests/test_config_parser.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import tempfile
from pyparsing import ParseSyntaxException, ParseException
import pytest
from pyhocon import ConfigFactory, ConfigSubstitutionException
Expand Down Expand Up @@ -602,11 +603,58 @@ def test_list_of_dicts(self):
]
"""
)
assert len(config.get('a')) == 2
assert config.get('a')[0].get('a') == 1
assert config.get('a')[0].get('b') == 2
assert config.get('a')[1].get('a') == 3
assert config.get('a')[1].get('c') == 4
assert config['a'] == [
{'a': 1, 'b': 2},
{'a': 3, 'c': 4}
]

def test_list_of_lists(self):
config = ConfigFactory.parse_string(
"""
a: [
[1, 2]
[3, 4]
]
"""
)
assert config['a'] == [
[1, 2],
[3, 4]
]

def test_list_of_dicts_with_merge(self):
config = ConfigFactory.parse_string(
"""
b = {f: 4}
a: [
${b} {a: 1, b: 2},
{a: 3, c: 4} ${b},
{a: 3} ${b} {c: 6},
]
"""
)
assert config['a'] == [
{'a': 1, 'b': 2, 'f': 4},
{'a': 3, 'c': 4, 'f': 4},
{'a': 3, 'c': 6, 'f': 4}
]

def test_list_of_lists_with_merge(self):
config = ConfigFactory.parse_string(
"""
b = [5, 6]
a: [
${b} [1, 2]
[3, 4] ${b}
[1, 2] ${b} [7, 8]
]
"""
)
assert config['a'] == [
[5, 6, 1, 2],
[3, 4, 5, 6],
[1, 2, 5, 6, 7, 8]
]

def test_invalid_assignment(self):
with pytest.raises(ParseSyntaxException):
Expand Down Expand Up @@ -634,3 +682,85 @@ def test_invalid_dict(self):

with pytest.raises(ParseSyntaxException):
ConfigFactory.parse_string('a = {g}')

def test_include_list(self):
with tempfile.NamedTemporaryFile('w') as fdin:
fdin.write('[1, 2]')
fdin.flush()

config1 = ConfigFactory.parse_string(
"""
a: [
include "{tmp_file}"
3
4
]
""".format(tmp_file=fdin.name)
)
assert config1['a'] == [1, 2, 3, 4]

config2 = ConfigFactory.parse_string(
"""
a: [
3
4
include "{tmp_file}"
]
""".format(tmp_file=fdin.name)
)
assert config2['a'] == [3, 4, 1, 2]

config3 = ConfigFactory.parse_string(
"""
a: [
3
include "{tmp_file}"
4
]
""".format(tmp_file=fdin.name)
)
assert config3['a'] == [3, 1, 2, 4]

def test_include_dict(self):
expected_res = {
'a': 1,
'b': 2,
'c': 3,
'd': 4
}
with tempfile.NamedTemporaryFile('w') as fdin:
fdin.write('{a: 1, b: 2}')
fdin.flush()

config1 = ConfigFactory.parse_string(
"""
a: {{
include "{tmp_file}"
c: 3
d: 4
}}
""".format(tmp_file=fdin.name)
)
assert config1['a'] == expected_res

config2 = ConfigFactory.parse_string(
"""
a: {{
c: 3
d: 4
include "{tmp_file}"
}}
""".format(tmp_file=fdin.name)
)
assert config2['a'] == expected_res

config3 = ConfigFactory.parse_string(
"""
a: {{
c: 3
include "{tmp_file}"
d: 4
}}
""".format(tmp_file=fdin.name)
)
assert config3['a'] == expected_res
Loading

0 comments on commit dc208c8

Please sign in to comment.