diff --git a/tools/charts/README.md b/tools/charts/README.md
new file mode 100644
index 0000000000..8feff8bbc2
--- /dev/null
+++ b/tools/charts/README.md
@@ -0,0 +1,18 @@
+# Tool to generate a charts dataset
+
+## Chart Picker
+
+Decides on what charts to generate from the topic-map.
+
+### Run
+
+```bash
+python3 picker.py
+```
+
+## Dataset Generator
+
+Uses the configs generated above to produce pairs of svg and encoded csv.
+
+TODO: Implement me.
+
diff --git a/tools/charts/picker.py b/tools/charts/picker.py
new file mode 100644
index 0000000000..49fe85bea1
--- /dev/null
+++ b/tools/charts/picker.py
@@ -0,0 +1,368 @@
+# Copyright 2023 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from dataclasses import dataclass
+import json
+import os
+from typing import Dict, List
+
+import datacommons as dc
+
+#
+# | 1-P, 1-V | N-P, 1-V | 1-P, N-V | N-P, N-V
+# ----------|------------|------------|------------|-------------
+# Map | | x | |
+# Gauge | x | | |
+# Higlight | x | | |
+# Pie | | | x |
+# Donut | | | x |
+# Line | x | x | x | x
+# Ranking | | x | |
+# V-Bar | x | x | x | x
+# H-Bar | x | x | x | x
+# V-St-Bar | | | x | x
+# H-St-Bar | | | x | x
+# V-Lol | | x | x | x
+# H-Lol | | x | x | x
+# V-St-Lol | | | x | x
+# H-St-Lol | | | x | x
+#
+# Scatter => N-P + 2-V
+#
+
+# Topic Cache JSON
+_TOPIC_CACHE = '../../server/config/nl_page/topic_cache.json'
+_SDG_TOPIC_CACHE = '../../server/config/nl_page/sdg_topic_cache.json'
+
+# Generated charts dir
+_OUT_DIR = 'output_charts'
+
+_HTML_OPEN = """
+
+
+
+
+
+
+"""
+
+_HTML_CLOSE = """
+
+
+"""
+
+_MAIN_SPC = ' '
+_SUB_SPC = _MAIN_SPC + ' '
+
+
+def _fname(outdir: str, fno: int) -> str:
+ return os.path.join(outdir, f'shard_{fno}.html')
+
+
+class Writer:
+
+ def __init__(self, outdir):
+ self.next_fno = 1
+ self.ncharts = 0
+ self.total_charts = 0
+ self.fp = None
+ os.makedirs(outdir, exist_ok=True)
+ self.outdir = outdir
+ self._new_html()
+
+ def add(self, chart_type: str, params: List[str]):
+ content = f'{_MAIN_SPC} \n'
+ self.fp.write(content)
+
+ self.total_charts += 1
+ self.ncharts += 1
+ if self.ncharts >= 100:
+ self._new_html()
+
+ def close(self):
+ if self.fp:
+ self.fp.write(_HTML_CLOSE)
+ self.fp.close()
+
+ def _new_html(self):
+ self.close()
+ self.fp = open(_fname(self.outdir, self.next_fno), 'w')
+ self.fp.write(_HTML_OPEN)
+ # Write the header for html.
+ self.next_fno += 1
+ self.ncharts = 0
+
+
+@dataclass
+class Context:
+ place: str
+ child_type: str
+ child_places: List[str]
+ writer: Writer
+ topic_map: Dict[str, Dict]
+
+
+def _composite(v: str) -> bool:
+ return v.startswith('dc/topic/') or v.startswith('dc/svpg/')
+
+
+#
+# Helper functions to generate the chart configs.
+#
+def _gauge(v: str, title: str, ctx: Context):
+ parts = [
+ f'header="{title}"',
+ f'place="{ctx.place}"',
+ f'variable="{v}"',
+ 'min="0"',
+ 'max="100"',
+ ]
+ ctx.writer.add('gauge', parts)
+
+
+def _highlight(v: str, title: str, ctx: Context):
+ parts = [f'header="{title}"', f'place="{ctx.place}"', f'variable="{v}"']
+ ctx.writer.add('higlight', parts)
+
+
+def _map(v: str, title: str, ctx: Context):
+ parts = [
+ f'header="{title}"',
+ f'parentPlace="{ctx.place}"',
+ f'childPlaceType="{ctx.child_type}"',
+ f'variable="{v}"',
+ ]
+ ctx.writer.add('map', parts)
+
+
+def _ranking(v: str, title: str, ctx: Context):
+ parts = [
+ f'header="{title}"',
+ f'parentPlace="{ctx.place}"',
+ f'childPlaceType="{ctx.child_type}"',
+ f'variable="{v}"',
+ 'rankingCount=10',
+ ]
+ ctx.writer.add('ranking', parts)
+
+ parts = [
+ f'header="{title}"',
+ f'parentPlace="{ctx.place}"',
+ f'childPlaceType="{ctx.child_type}"',
+ f'variable="{v}"',
+ 'showLowest',
+ 'rankingCount=10',
+ ]
+ ctx.writer.add('ranking', parts)
+
+
+def _pie(vars: List[str], title: str, ctx: Context, donut: bool = False):
+ vars_str = ' '.join(vars)
+ parts = [
+ f'header="{title}"', f'place="{ctx.place}"', f'variables="{vars_str}"'
+ ]
+ if donut:
+ parts.append('donut')
+ ctx.writer.add('pie', parts)
+
+
+def _line(vars: List[str], title: str, ctx: Context):
+ vars_str = ' '.join(vars[:5])
+
+ parts = [
+ f'header="{title}"', f'place="{ctx.place}"', f'variables="{vars_str}"'
+ ]
+ ctx.writer.add('line', parts)
+
+ places_str = ' '.join(ctx.child_places)
+ parts = [
+ f'header="{title}"', f'places="{places_str}"', f'variables="{vars_str}"'
+ ]
+ ctx.writer.add('line', parts)
+
+ parts = [
+ f'header="{title}"', f'parentPlace="{ctx.place}"',
+ f'childPlaceType="{ctx.child_type}"', f'variables="{vars_str}"',
+ f'maxPlaces="10"'
+ ]
+ ctx.writer.add('line', parts)
+
+
+def _bar(vars: List[str],
+ title: str,
+ ctx: Context,
+ stacked: bool = False,
+ lollipop: bool = False,
+ horizontal: bool = False):
+ vars_str = ' '.join(vars[:5])
+
+ def _opts(parts):
+ if stacked:
+ parts.append('stacked')
+ if lollipop:
+ parts.append('lollipop')
+ if horizontal:
+ parts.append('horizontal')
+ return parts
+
+ parts = [
+ f'header="{title}"',
+ f'place="{ctx.place}"',
+ f'variables="{vars_str}"',
+ ]
+ ctx.writer.add('bar', _opts(parts))
+
+ places_str = ' '.join(ctx.child_places)
+ parts = [
+ f'header="{title}"',
+ f'places="{places_str}"',
+ f'variables="{vars_str}"',
+ f'sort="ascending"',
+ ]
+ ctx.writer.add('bar', _opts(parts))
+
+ parts = [
+ f'header="{title}"',
+ f'parentPlace="{ctx.place}"',
+ f'childPlaceType="{ctx.child_type}"',
+ f'variables="{vars_str}"',
+ 'maxPlaces="10"',
+ f'sort="descending"',
+ ]
+ ctx.writer.add('bar', _opts(parts))
+
+
+#
+# Overall logic to pick charts.
+#
+def pick_charts(vars: List[str], title: str, ctx: Context):
+ for v in vars:
+ vname = ctx.topic_map.get(v, {}).get('n', '')
+ _gauge(v, vname, ctx)
+ _highlight(v, vname, ctx)
+ _map(v, vname, ctx)
+ _ranking(v, vname, ctx)
+
+ _line(vars, title, ctx)
+
+ _bar(vars, title, ctx, horizontal=False)
+ _bar(vars, title, ctx, horizontal=True)
+ _bar(vars, title, ctx, lollipop=True, horizontal=False)
+ _bar(vars, title, ctx, lollipop=True, horizontal=True)
+
+ if len(vars) > 1:
+ _pie(vars, title, ctx, donut=False)
+ _pie(vars, title, ctx, donut=True)
+ _bar(vars, title, ctx, stacked=True, horizontal=False)
+ _bar(vars, title, ctx, stacked=True, horizontal=True)
+ _bar(vars, title, ctx, stacked=True, horizontal=False, lollipop=True)
+ _bar(vars, title, ctx, stacked=True, horizontal=True, lollipop=True)
+
+
+def pick(topic: str, ctx: Context):
+ vars = []
+ for v in ctx.topic_map.get(topic, {}).get('v', []):
+ if not _composite(v):
+ vars.append(v)
+ if vars:
+ pick_charts(vars, ctx.topic_map[topic]['n'], ctx)
+
+
+def load_topics(topic_cache_file: str):
+ with open(topic_cache_file, 'r') as fp:
+ cache = json.load(fp)
+
+ all_vars = set()
+ out_map = {}
+ for node in cache['nodes']:
+ dcid = node['dcid'][0]
+ name = node['name'][0]
+ if 'relevantVariableList' in node:
+ vars = node['relevantVariableList']
+ else:
+ vars = node['memberList']
+ out_map[dcid] = {'n': name, 'v': vars}
+ for v in vars:
+ if not _composite(v):
+ all_vars.add(v)
+
+ # Get all the names of variables.
+ all_vars = sorted(list(all_vars))
+ for id, names in dc.get_property_values(all_vars, 'name').items():
+ assert id not in out_map, id
+ if names:
+ out_map[id] = {'n': names[0]}
+
+ return out_map
+
+
+def main():
+ writer = Writer(_OUT_DIR)
+ ntopics = 0
+
+ topic_map = load_topics(_TOPIC_CACHE)
+ for pl, ct, cpl in [
+ ('country/USA', 'State',
+ ['geoId/06', 'geoId/36', 'geoId/08', 'geoId/48', 'geoId/27']),
+ ('geoId/06', 'County',
+ ['geoId/06085', 'geoId/06061', 'geoId/06029', 'geoId/06025']),
+ ('Earth', 'Country', [
+ 'country/USA', 'country/IND', 'country/IRN', 'country/NGA',
+ 'country/BRA'
+ ]),
+ ]:
+ for t in sorted(topic_map.keys()):
+ if not _composite(t):
+ continue
+ ntopics += 1
+ pick(
+ t,
+ Context(place=pl,
+ child_type=ct,
+ child_places=cpl,
+ writer=writer,
+ topic_map=topic_map))
+
+ topic_map = load_topics(_SDG_TOPIC_CACHE)
+ for pl, ct, cpl in [
+ ('Earth', 'Country', [
+ 'country/USA', 'country/IND', 'country/IRN', 'country/NGA',
+ 'country/BRA'
+ ]),
+ ]:
+ for t in sorted(topic_map.keys()):
+ if not _composite(t):
+ continue
+ ntopics += 1
+ pick(
+ t,
+ Context(place=pl,
+ child_type=ct,
+ child_places=cpl,
+ writer=writer,
+ topic_map=topic_map))
+
+ writer.close()
+ print(
+ f'Processed {ntopics} topics and produced {writer.total_charts} charts!')
+
+
+if __name__ == "__main__":
+ main()