-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmanage.py
executable file
·1148 lines (958 loc) · 48.4 KB
/
manage.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/env python
import csv
import json
import re
import sys
import warnings
from collections import OrderedDict, defaultdict
from copy import deepcopy
from io import StringIO
from operator import itemgetter
from pathlib import Path
import click
import json_merge_patch
import mdformat
import requests
import yaml
from docutils import nodes
from jsonschema import FormatChecker
from jsonschema.validators import Draft4Validator
from ocdsextensionregistry import ProfileBuilder
from ocdskit.mapping_sheet import mapping_sheet
from ocdskit.schema import add_validation_properties
basedir = Path(__file__).resolve().parent
referencedir = basedir / "docs" / "reference"
class Dumper(yaml.SafeDumper):
def ignore_aliases(self, _data):
return True
def str_representer(dumper, data):
# Use the literal style on multiline strings to reduce quoting, instead of the single-quoted style (default).
return dumper.represent_scalar("tag:yaml.org,2002:str", data, style="|" if "\n" in data else None)
Dumper.add_representer(str, str_representer)
def get(url):
"""GET a URL and returns the response. Raises an exception if the status code is not successful."""
response = requests.get(url, timeout=10)
response.raise_for_status()
return response
def csv_reader(url):
"""Read a CSV from a URL and returns a ``csv.DictReader`` object."""
return csv.DictReader(StringIO(get(url).text))
def write_yaml_file(filename, data):
with open(filename, "w") as f:
# Make it easier to see indentation. Avoid line wrapping. sort_keys is True by default.
yaml.dump(data, f, Dumper=Dumper, indent=4, width=1000, sort_keys=False)
# From standard-maintenance-scripts/tests/test_readme.py
def set_additional_properties(data, additional_properties):
if isinstance(data, list):
for item in data:
set_additional_properties(item, additional_properties)
elif isinstance(data, dict):
if "properties" in data:
data["additionalProperties"] = additional_properties
for value in data.values():
set_additional_properties(value, additional_properties)
def traverse(schema_action=None, object_action=None):
"""Walk through the schema, performing actions."""
if object_action is None:
def object_action(value):
pass
def _coerce_to_list(data, key):
"""
Return the value of the ``key`` key in the ``data`` mapping. If the value is a string, wraps it in an array.
"""
item = data.get(key, [])
if isinstance(item, str):
return [item]
return item
def _method(schema, pointer=""):
schema_action(schema, pointer)
if "properties" in schema:
for key, value in schema["properties"].items():
new_pointer = f"{pointer}/{key}"
prop_type = _coerce_to_list(value, "type")
object_action(value)
if "object" in prop_type:
_method(value, pointer=new_pointer)
elif "array" in prop_type:
items_type = _coerce_to_list(value["items"], "type")
object_action(value["items"])
# Recursing into arrays of arrays or arrays of objects hasn't been implemented.
if "object" in items_type or (
"array" in items_type and new_pointer != "/Location/geometry/coordinates"
):
raise NotImplementedError(f"{new_pointer}/items has unexpected type {items_type}")
elif pointer != "/Observation/dimensions":
warnings.warn(f'Missing "properties" key at {pointer}', stacklevel=2)
for key, value in schema.get("definitions", {}).items():
_method(value, pointer=f"{pointer}/{key}")
return _method
@click.group()
def cli():
pass
@cli.command()
def pre_commit():
"""Update docs/reference/schema.md and _static/i8n.csv."""
def _get_definition_references(schema, defn, parents=None, project_schema=None, *, include_nested=True):
"""
Recursively generate a list of JSON pointers that reference a definition in JSON schema.
:param schema: The JSON schema
:param defn: The name of the definition
:param parents: A list of the parents of schema
:param project_schema: The full project schema
:param include_nested: Whether to include nested references
"""
references = []
if parents is None:
parents = []
if project_schema is None:
project_schema = schema
if "properties" in schema:
for key, value in schema["properties"].items():
if value.get("type") in ["array", ["array"]] and "$ref" in value["items"]:
if value["items"]["$ref"] == f"#/definitions/{defn}":
references.append([*parents, key, "0"])
elif include_nested:
references.extend(
_get_definition_references(
project_schema["definitions"][value["items"]["$ref"].split("/")[-1]],
defn,
[*parents, key, "0"],
project_schema,
include_nested=include_nested,
)
)
elif "$ref" in value:
if value["$ref"] == f"#/definitions/{defn}":
references.append([*parents, key])
elif include_nested:
references.extend(
_get_definition_references(
project_schema["definitions"][value["$ref"].split("/")[-1]],
defn,
[*parents, key],
project_schema,
include_nested=include_nested,
)
)
elif "properties" in value:
references.extend(
_get_definition_references(
value, defn, [*parents, key], project_schema, include_nested=include_nested
)
)
if "definitions" in schema:
for key, value in schema["definitions"].items():
references.extend(
_get_definition_references(value, defn, [key], project_schema, include_nested=include_nested)
)
return references
def _update_sub_schema_reference(schema):
"""Update docs/reference/schema.md."""
with (referencedir / "schema.md").open() as f:
schema_reference = f.readlines()
# Preserve content that appears before the generated reference content for each sub-schema
sub_schema_index = schema_reference.index("## Sub-schemas\n") + 3
for i in range(sub_schema_index, len(schema_reference)):
if schema_reference[i][:4] == "### ":
defn = schema_reference[i][4:-1]
# Drop definitions that don't appear in the schema
if defn in schema["definitions"]:
schema["definitions"][defn]["content"] = []
j = i + 1
while j < len(schema_reference) and not schema_reference[j].startswith(f"`{defn}` is defined as:"):
schema["definitions"][defn]["content"].append(schema_reference[j])
j = j + 1
# Preserve introductory content up to and including the sentence below the ## Sub-schema heading
schema_reference = schema_reference[:sub_schema_index]
# Generate standard reference content for each definition
for defn, definition in schema["definitions"].items():
definition["content"] = definition.get("content", [])
# Add heading
definition["content"].insert(0, f"\n### {defn}\n")
# Add description
if definition["content"][-1] != "\n":
definition["content"].append("\n")
definition["content"].extend(
[
f"`{defn}` is defined as:\n\n",
f"```{{field-description}} ../../build/current_lang/project-schema.json /definitions/{defn}\n",
"```\n\n",
]
)
# Add a list of properties that reference this definition
definition["references"] = _get_definition_references(schema, defn, include_nested=False)
definition["content"].append("This sub-schema is referenced by the following properties:\n")
for ref_with_indexes in definition["references"]:
# Remove array indexes because they aren't in the HTML anchors generated by the jsonschema directive
ref = [part for part in ref_with_indexes if part != "0"]
url = "project-schema.json,"
# Omit nested references
if ref[0] in schema["definitions"]:
url += f"/definitions/{ref[0]},{'/'.join(ref[1:])}"
else:
url += f",{'/'.join(ref)}"
definition["content"].append(f"* [`{'/'.join(ref)}`]({url})\n")
# Add schema table
properties_to_collapse = []
for key, value in definition["properties"].items():
if value.get("type") not in ["object", ["object"]]:
properties_to_collapse.append(key)
definition["content"].extend(
[
f"\nEach `{defn}` has the following fields:\n\n",
"`````{tab-set}\n\n",
"````{tab-item} Schema\n\n",
"```{jsonschema} ../../build/current_lang/project-schema.json\n",
f":pointer: /definitions/{defn}\n",
f":collapse: {','.join(properties_to_collapse)}\n:addtargets:\n```\n\n",
"````\n\n",
"````{tab-item} Examples\n\n",
]
)
# Paths that don't appear in the example data at all
paths_to_skip = [
"forecasts/0/observations/0/value",
"metrics/0/observations/0/value",
"parties/0/beneficialOwners/0",
"parties/0/people/0/address",
"parties/0/people/0/identifier",
"contractingProcesses/0/summary/finance",
"contractingProcesses/1/summary/finance",
"contractingProcesses/2/summary/finance",
"social/consultationMeetings/0/publicOffice",
]
# Add examples
definition["references"] = _get_definition_references(schema, defn)
for ref in definition["references"]:
if ref[0] not in schema["definitions"] and not any(
p == "/".join(ref)[: len(p)] for p in paths_to_skip
):
if ref[-1] == "0":
ref.pop(-1)
definition["content"].extend(
[
"```{jsoninclude} ../../docs/examples/example.json\n",
f":jsonpointer: /projects/0/{'/'.join(ref)}\n",
f":title: {'/'.join(ref)}\n",
"```\n\n",
]
)
definition["content"].extend(["````\n\n", "`````\n"])
schema_reference.extend(definition["content"])
# Paths that don't appear in the example data, but for which there is an alternative
paths_to_replace = {
"/projects/0/contractingProcesses/0/summary/modifications/0/oldContractValue": (
"/projects/0/contractingProcesses/0/summary/modifications/2/oldContractValue"
),
"/projects/0/contractingProcesses/0/summary/modifications/0/newContractValue": (
"/projects/0/contractingProcesses/0/summary/modifications/2/newContractValue"
),
}
for key, value in paths_to_replace.items():
index = schema_reference.index(f":jsonpointer: {key}\n")
del schema_reference[index]
schema_reference.insert(index, f":jsonpointer: {value}\n")
with (referencedir / "schema.md").open("w") as f:
f.writelines(schema_reference)
with (basedir / "schema" / "project-level" / "project-schema.json").open() as f:
schema = json.load(f)
# Update schema reference documentation
_update_sub_schema_reference(schema)
# Generate a CSV file of fields that can contain non-English text.
_, rows = mapping_sheet(schema, include_codelist=True, include_deprecated=False)
with (basedir / "docs" / "_static" / "i18n.csv").open("w") as f:
fieldnames = ["path", "title", "translatable", "notes"]
writer = csv.DictWriter(f, fieldnames=fieldnames, lineterminator="\n", extrasaction="ignore")
writer.writeheader()
for row in rows:
row["translatable"] = row["type"] == "string" and not row["values"] and not row["codelist"]
if row["path"] in {"id", "contractingProcesses/id", "contractingProcesses/summary/ocid"}:
row["notes"] = "Only the part of the identifier following the prefix can be internationalized."
elif row["path"] in {"forecasts/observations/measure", "metrics/observation/measure"}:
row["notes"] = "Only string measures can be internationalized."
writer.writerow(row)
# https://raw.githubusercontent.com/open-contracting-extensions/public-private-partnerships/1.0-dev/schema/patched/
@cli.command()
@click.option(
"--ppp-base-url", default="https://standard.open-contracting.org/profiles/ppp/latest/en/_static/patched/"
)
def update(ppp_base_url):
"""
Align OC4IDS with OCDS.
It uses OCDS for PPPs as a basis, because it includes most definitions and codelists needed in OC4IDS. It copies
definitions, properties and codelists across, making modifications as required.
Run this command for every release of OCDS for PPPs, review any changes to schemas or codelists, and update the
command as needed.
Some OC4IDS-specific definitions have fields with the same names as in OCDS-specific definitions, notably:
- procurementMethod
- procurementMethodDetails
- tenderers
The descriptions of most other such fields have diverged. As such, the command makes no effort to copy the
descriptions of such fields, and instead leaves this up to the editor.
"""
def _edit_code(row, oc4ids_codes, source):
"""
If the row's Code is in the ``oc4ids_codes`` list, add " or project" after "contracting process" in the row's
Description and sets the row's Source to ``"OC4IDS"``. Otherwise, set the row's Source to ``source``.
"""
if row["Code"] in oc4ids_codes:
row["Description"] = re.sub(r"(?<=contracting process\b)", " or project", row["Description"])
row["Description"] = re.sub(r"(?<=contracting processes\b)", " or projects", row["Description"])
row["Source"] = "OC4IDS"
else:
row["Source"] = source
return row
def _merge_codes(sources, fieldnames, basename, ignore, oc4ids_codes, oc4ids_rows):
io = StringIO()
writer = csv.DictWriter(io, fieldnames, lineterminator="\n", extrasaction="ignore")
writer.writeheader()
seen = []
for source in sources:
for row in csv_reader(f"{ocds_base_url if source == 'OCDS' else ppp_base_url}codelists/{basename}"):
if row["Code"] not in seen and row["Code"] not in ignore:
seen.append(row["Code"])
_edit_code(row, oc4ids_codes, source)
writer.writerow(row)
# Add pre-existing codes from OC4IDS.
writer.writerows(row for row in oc4ids_rows if row["Code"] not in seen)
return io.getvalue()
def _copy_element(name, replacements=None, root="definitions"):
"""
Copy definitions or properties from the OCDS for PPPs schema to the OC4IDS schema.
:param name: The name of the definition or property to copy
:param replacements: A dict whose keys are tuples containing the path of the field in which the replacement is
to be performed and whose values are a function to perform the replacements
:param str root: "definitions" or "properties"
"""
value = deepcopy(ppp_schema[root][name])
schema[root][name] = value
if replacements:
for keys, replacement in replacements.items():
leaf = keys[-1]
for key in keys[:-1]:
value = value[key]
value[leaf] = replacement(value[leaf])
# Similar in structure to `add_versioned` in the standard's `make_versioned_release_schema.py`.
def _remove_null_and_pattern_properties(*args):
"""Remove the "patternProperties" key, ``"null"`` from the "type" key, and ``None`` from the "enum" key."""
def __schema_action(schema, _pointer):
schema.pop("patternProperties", None)
def __object_action(value):
if "type" in value and isinstance(value["type"], list) and "null" in value["type"]:
value["type"].remove("null")
if "enum" in value and None in value["enum"]:
value["enum"].remove(None)
traverse(__schema_action, __object_action)(*args)
def _remove_deprecated_properties(*args):
"""Remove "deprecated" properties."""
def __schema_action(schema, pointer):
if "properties" in schema:
for key in list(schema["properties"]):
if "deprecated" in schema["properties"][key]:
del schema["properties"][key]
elif pointer != "/Observation/dimensions":
warnings.warn(f'Missing "properties" key at {pointer}', stacklevel=2)
traverse(__schema_action)(*args)
def _remove_integer_identifier_types(*args):
"""Set all ``id`` fields to allow only strings, not integers."""
def __schema_action(schema, pointer):
if "properties" in schema:
if "id" in schema["properties"]:
schema["properties"]["id"]["type"] = "string"
elif pointer != "/Observation/dimensions":
warnings.warn(f'Missing "properties" key at {pointer}', stacklevel=2)
traverse(__schema_action)(*args)
def _compare(actual, infra_list, ocds_list, prefix, suffix):
"""
Exit if ``infra_list`` contains values not in ``actual``, or if ``actual`` contains values not in ``ocds_list``
or ``infra_list``. This ensures editors update this script when codelists or definitions are added to OC4IDS.
"""
actual = set(actual)
# An editor might've added an infrastructure codelist or copied an OCDS codelist, without updating this script.
added = actual - infra_list - ocds_list
if added:
sys.exit(f'{prefix} has unexpected {", ".join(added)}: add to infra_{suffix} or ocds_{suffix}?')
# An editor might've removed an infrastructure codelist, without updating this script.
removed = infra_list - actual
if removed:
sys.exit(f'{prefix} is missing {", ".join(removed)}: remove from infra_{suffix}?')
ocds_base_url = "https://standard.open-contracting.org/1.1/en/"
builder = ProfileBuilder(
"1__1__5",
{
"budget": "master",
"transaction_milestones": "master",
"beneficialOwners": "master",
"organizationClassification": "1.1",
},
)
response = get(f"{ppp_base_url}release-schema.json")
ppp_schema = json.loads(response.text.replace("{{version}}", "1.1").replace("{{lang}}", "en"))
ppp_schema = builder.patched_release_schema(schema=ppp_schema)
schema_dir = basedir / "schema" / "project-level"
codelists_dir = schema_dir / "codelists"
with (schema_dir / "project-schema.json").open() as f:
schema = json.load(f)
infra_codelists = {
"classificationScheme.csv",
"climateMeasures.csv",
"climateOversightTypes.csv",
"conservationMeasure.csv",
"constructionMaterial.csv",
"contractingProcessStatus.csv",
"contractNature.csv",
"costCategory.csv",
"country.csv", # move to ocds_codelists for OCDS 1.2
"environmentalGoal.csv",
"laborObligations.csv",
"metricID.csv",
"modificationType.csv",
"policyAlignment.csv",
"projectSector.csv",
"projectStatus.csv",
"projectType.csv",
"relatedProject.csv",
"relatedProjectScheme.csv",
"sustainabilityStrategy.csv",
}
ocds_codelists = {
"currency.csv",
"documentType.csv",
"geometryType.csv",
"locationGazetteers.csv",
"method.csv",
"partyRole.csv",
"releaseTag.csv",
"unitClassificationScheme.csv",
"milestoneType.csv",
"milestoneStatus.csv",
"milestoneCode.csv",
"assetClass.csv",
"debtRepaymentPriority.csv",
"financingArrangementType.csv",
"financingPartyType.csv",
}
_compare(
[path.name for path in codelists_dir.iterdir()],
infra_codelists,
ocds_codelists,
"schema/project-level/codelists",
"codelists",
)
infra_definitions = {
"Beneficiary",
"Benefit",
"BudgetBreakdowns",
"ClimateMeasure",
"ConservationMeasure",
"ContractingProcess",
"ContractingProcessSummary", # Similar to individual release in OCDS
"Cost",
"CostGroup",
"CostMeasurement",
"HealthAndSafety",
"LaborObligations",
"LinkedRelease", # Similar to linked release in OCDS
"Meeting",
"Modification",
"PublicOffice",
"RelatedProject", # Similar to relatedProcess in OCDS
"SimpleIdentifier",
"Social",
"Sustainability",
}
ocds_definitions = {
"Period",
"Classification",
"Location",
"Value",
"Organization",
"OrganizationReference",
"Address",
"ContactPoint",
"BudgetBreakdown",
"Document",
"Identifier",
"Metric",
"Observation",
"Transaction",
"Milestone",
"MilestoneReference",
"Person",
"Finance",
}
_compare(
schema["definitions"],
infra_definitions,
ocds_definitions,
"schema/project-level/project-schema.json#/definitions",
"definitions",
)
# Originally from https://docs.google.com/spreadsheets/d/1ttXgMmmLvqBlPRi_4jAJhIobjnCiwMv13YwGfFOnoJk/edit#gid=0
ignore = {
# https://github.com/open-contracting/infrastructure/issues/269
"finalAudit",
# https://github.com/open-contracting/standard/issues/870
"contractSchedule",
# PPP-specific code or description
"needsAssessment",
"projectAdditionality",
"financeAdditionality",
"pppModeRationale",
"riskComparison",
"discountRate",
"equityTransferCaps",
"financeArrangements",
"guaranteeReports",
"grants",
"servicePayments",
"landTransfer",
"assetTransfer",
"revenueShare",
"otherGovernmentSupport",
"tariffMethod",
"tariffReview",
"tariffs",
"tariffIllustration",
"handover",
"financialStatement",
}
# Copy the OCDS codelists.
for basename in ocds_codelists:
path = schema_dir / "codelists" / basename
if basename in {"documentType.csv", "locationGazetteers.csv", "partyRole.csv"}:
with open(path) as f:
reader = csv.DictReader(f)
fieldnames = reader.fieldnames
oc4ids_rows = []
oc4ids_codes = []
for row in reader:
if row.get("Source", row.get("Extension")) == "OC4IDS":
oc4ids_rows.append(row)
oc4ids_codes.append(row["Code"])
with open(path, "w") as f:
if basename == "documentType.csv":
text = _merge_codes(["OCDS for PPPs", "OCDS"], fieldnames, basename, ignore, oc4ids_codes, oc4ids_rows)
elif basename == "locationGazetteers.csv":
text = _merge_codes(["OCDS for PPPs"], fieldnames, basename, [], oc4ids_codes, oc4ids_rows)
elif basename == "partyRole.csv":
text = _merge_codes(["OCDS"], fieldnames, basename, [], oc4ids_codes, oc4ids_rows)
else:
text = get(f"{ppp_base_url}codelists/{basename}").text
f.write(text)
# Copy properties
_copy_element(
"language",
{
("title",): lambda s: s.replace("Release language", "Language"),
},
root="properties",
)
# Copy definitions. The following definitions follow the same order as in project-schema.json.
_copy_element(
"Period",
{
# Refer to project.
("description",): lambda s: s.replace("contracting process", "project or contracting process"),
},
)
_copy_element(
"Classification",
{
# Replace line item classification scheme codelist with classification scheme codelist
("properties", "scheme", "description"): lambda s: s.replace(
". For line item classifications, this uses the open [itemClassificationScheme](https://standard.open-contracting.org/1.1/en/schema/codelists/#item-classification-scheme) codelist.", # noqa: E501
", using the open [classificationScheme](https://standard.open-contracting.org/infrastructure/{{version}}/{{lang}}/reference/codelists/#classificationscheme) codelist.", # noqa: E501
),
},
)
# Replace line item classification scheme codelist with classification scheme codelist
schema["definitions"]["Classification"]["properties"]["scheme"]["codelist"] = "classificationScheme.csv"
_copy_element("Location")
# Original from extension: "The location where activity related to this tender, contract or license will be delivered, or will take place. A location can be described by either a geometry (point location, line or polygon), or a gazetteer entry, or both." # noqa: E501
schema["definitions"]["Location"]["description"] = (
"The location where activity related to this project will be delivered, or will take place. A location may be described using a geometry (point location, line or polygon), a gazetteer entry, an address, or a combination of these." # noqa: E501
)
# Add id to Location.
schema["definitions"]["Location"]["properties"]["id"] = {
"title": "Identifier",
"description": "A local identifier for this location, unique within the array this location appears in.",
"type": "string",
"minLength": 1,
}
# Add address to Location.
schema["definitions"]["Location"]["properties"]["address"] = {
"title": "Address",
"description": "A physical address where works will take place.",
"$ref": "#/definitions/Address",
}
schema["definitions"]["Location"]["properties"] = OrderedDict(schema["definitions"]["Location"]["properties"])
schema["definitions"]["Location"]["properties"].move_to_end("id", last=False)
schema["definitions"]["Location"]["required"] = ["id"]
# Set stricter validation on gazetteer identifiers.
schema["definitions"]["Location"]["properties"]["gazetteer"]["properties"]["identifiers"]["uniqueItems"] = True
_copy_element("Value")
_copy_element(
"Organization",
{
# Refer to project instead of contracting process, link to infrastructure codelist instead of PPP codelist.
("properties", "roles", "description"): lambda s: (
s.replace("contracting process", "project").replace(
"profiles/ppp/latest/en/", "infrastructure/{{version}}/{{lang}}/"
)
),
},
)
# Remove unneeded fields from Organization.
del schema["definitions"]["Organization"]["properties"]["shareholders"]
# Remove OCDS-specific merging properties.
del schema["definitions"]["Organization"]["properties"]["additionalIdentifiers"]["wholeListMerge"]
del schema["definitions"]["Organization"]["properties"]["details"]["properties"]["classifications"][
"wholeListMerge"
]
# Remove details wrapper from Organization.
schema["definitions"]["Organization"]["properties"]["classifications"] = schema["definitions"]["Organization"][
"properties"
]["details"]["properties"]["classifications"]
del schema["definitions"]["Organization"]["properties"]["details"]
# Set stricter validation on party roles.
schema["definitions"]["Organization"]["properties"]["roles"]["uniqueItems"] = True
# Add people field to Organization.
schema["definitions"]["Organization"]["properties"]["people"] = {
"title": "People",
"description": "People associated with, representing, or working on behalf of this organization in respect of this project.", # noqa: E501
"type": "array",
"items": {"$ref": "#/definitions/Person"},
"uniqueItems": True,
}
_copy_element("OrganizationReference")
_copy_element("Address")
_copy_element(
"ContactPoint",
{
# Refer to project instead of contracting process.
("properties", "name", "description"): lambda s: s.replace("contracting process", "project"),
},
)
_copy_element(
"BudgetBreakdown",
{
# Refer to project instead of contracting process.
("properties", "amount", "description"): lambda s: s.replace(
"contracting process",
"project",
)
},
)
# Add approval date.
schema["definitions"]["BudgetBreakdown"]["properties"]["approvalDate"] = deepcopy(
schema["properties"]["budget"]["properties"]["approvalDate"]
)
schema["definitions"]["BudgetBreakdown"]["properties"]["approvalDate"]["description"] = (
"The date on which this budget entry was approved. Where documentary evidence for this exists, it may be included among the project documents with `.documentType` set to 'budgetApproval'." # noqa: E501
)
_copy_element(
"Document",
{
# Link to infrastructure codelist instead of PPP codelist.
("properties", "documentType", "description"): lambda s: (
s.replace("profiles/ppp/latest/en/", "infrastructure/{{version}}/{{lang}}/")
),
},
)
del schema["definitions"]["Document"]["properties"]["accessDetailsURL"]
del schema["definitions"]["Document"]["properties"]["unofficialTranslations"]
# Original from standard: "A short description of the document. We recommend descriptions do not exceed 250 words. In the event the document is not accessible online, the description field can be used to describe arrangements for obtaining a copy of the document.", # noqa: E501
schema["definitions"]["Document"]["properties"]["description"]["description"] = (
"Where a link to a full document is provided, the description should provide a 1 - 3 paragraph summary of the information the document contains, and the `pageStart` field should be used to make sure readers can find the correct section of the document containing more information. Where there is no linked document available, the description field may contain all the information required by the current `documentType`. \n\nLine breaks in text (represented in JSON using `\\n\\n`) must be respected by systems displaying this information, and systems may also support basic HTML tags (H1-H6, B, I, U, strong, A and optionally IMG) or [markdown syntax](https://github.com/adam-p/markdown-here/wiki/Markdown-Cheatsheet) for formatting. " # noqa: E501
)
# Original from standard: " direct link to the document or attachment. The server providing access to this document should be configured to correctly report the document mime type." # noqa: E501
schema["definitions"]["Document"]["properties"]["url"]["description"] = (
"This should be a direct link to the document or web page where the information described by the current documentType exists." # noqa: E501
)
_copy_element("Identifier")
_copy_element(
"Metric",
{
("properties", "id", "description"): lambda s: (
s.replace("contracting process", "contracting process or project")
),
},
)
# Original from standard: "Metrics are used to set out targets and results from a contracting process. During the planning and tender sections, a metric indicates the anticipated results. In award and contract sections it indicates the awarded/contracted results. In the implementation section it is used to provide updates on actually delivered results, also known as outputs." # noqa: E501
schema["definitions"]["Metric"]["description"] = (
"Metrics are used to set out forecast and actual metrics targets for a project: for example, planned and actual physical and financial progress over time." # noqa: E501
)
_copy_element("Observation")
# Remove the relatedImplementationMilestone field.
del schema["definitions"]["Observation"]["properties"]["relatedImplementationMilestone"]
_copy_element("Transaction")
# Original from standard: "A spending transaction related to the contracting process. Draws upon the data models of the [Fiscal Data Package](https://frictionlessdata.io/specs/fiscal-data-package/) and the [International Aid Transparency Initiative](http://iatistandard.org/activity-standard/iati-activities/iati-activity/transaction/) and should be used to cross-reference to more detailed information held using a Fiscal Data Package, IATI file, or to provide enough information to allow a user to manually or automatically cross-reference with some other published source of transactional spending data." # noqa: E501
schema["definitions"]["Transaction"]["description"] = (
"A financial transaction related to a project or contracting process. Draws upon the data models of the Fiscal Data Package and the International Aid Transparency Initiative and should be used to cross-reference to more detailed information held using a Fiscal Data Package, IATI file, or to provide enough information to allow a user to manually or automatically cross-reference with some other published source of transactional data." # noqa: E501
)
_copy_element("Milestone")
# Original from standard: "The milestone block can be used to represent a wide variety of events in the lifetime of a contracting process." # noqa: E501
schema["definitions"]["Milestone"]["description"] = "An event in the lifetime of a project or contracting process."
# Original from standard: "A local identifier for this milestone, unique within this block. This field is used to keep track of multiple revisions of a milestone through the compilation from release to record mechanism." # noqa: E501
schema["definitions"]["Milestone"]["properties"]["id"]["description"] = (
"A local identifier for this milestone, unique within this block."
)
# Original from standard: "Milestone codes can be used to track specific events that take place for a particular kind of contracting process. For example, a code of 'approvalLetter' can be used to allow applications to understand this milestone represents the date an approvalLetter is due or signed." # noqa: E501
schema["definitions"]["Milestone"]["properties"]["code"]["description"] = (
"Milestone codes can be used to track specific events that take place for a particular kind of project or contracting process. For example, a code of 'approvalLetter' can be used to allow applications to understand this milestone represents the date an approvalLetter is due or signed." # noqa: E501
)
# Add Milestone.value from OCDS 1.2.
schema["definitions"]["Milestone"]["properties"]["value"] = {
"title": "Value",
"description": "The payment's value, if the milestone represents a planned payment.",
"$ref": "#/definitions/Value",
}
# Remove deprecated Milestone.documents field.
del schema["definitions"]["Milestone"]["properties"]["documents"]
_copy_element(
"MilestoneReference",
{
# Remove reference to release, add reference to project.
("properties", "id", "description"): lambda s: (
s.replace(
" described elsewhere in a release about this contracting process.",
" in this project or contracting process's `.milestones`.",
)
),
},
)
# Original from standard: "The title of the milestone being referenced, this must match the title of a milestone described elsewhere in a release about this contracting process." # noqa: E501
schema["definitions"]["MilestoneReference"]["properties"]["title"]["description"] = (
"The title of the milestone being referenced, this must match the title of a milestone in this project or contracting process's `.milestones`." # noqa: E501
)
_copy_element("Person")
schema["definitions"]["Person"]["properties"]["jobTitle"] = {
"title": "Job title",
"description": "The job title of the person (for example, Financial Manager).",
"type": "string",
"minLength": 1,
}
_remove_null_and_pattern_properties(schema)
_remove_integer_identifier_types(schema)
_remove_deprecated_properties(schema)
add_validation_properties(schema)
with (schema_dir / "project-schema.json").open("w") as f:
json.dump(schema, f, ensure_ascii=False, indent=2)
f.write("\n")
@cli.command()
@click.argument("filename", type=click.Path(exists=True, dir_okay=False, path_type=Path))
@click.option("-a", "--additional-properties", is_flag=True, help="Allow additional properties")
@click.option("-l", "--link-fields", is_flag=True, help="Link field names to jsonschema directives")
def lint(filename, additional_properties, link_fields):
def _get_fields(schema, parents=()):
"""Generate field names (as tuples) in the JSON Schema."""
for name, value in schema.get("properties", {}).items():
path = (*parents, name)
if "properties" in value:
yield from _get_fields(value, path)
else:
yield path
def _link(name, fields, text):
"""
Generate a link to a field in a jsonschema directive HTML table. Prompt if the field's name is ambiguous.
:param name: The property name
:param fields: A list of fields (as tuples) in the JSON schema
:param text: The text in which the property name appears
"""
if name[1:] in {"`true`", "`false`"}:
return name
path = tuple(part for part in name[2:-1].split(".") if part != "")
occurrences = [field for field in fields if field[-len(path) :] == path]
# Track the match index to highlight the current match.
matches[name] += 1
match_number = matches[name]
match len(occurrences):
case 0:
raise ValueError(f"Field {name} not in schema")
case 1:
choice = 0
# Resolve ambiguous field names, like `title`.
case _:
click.secho(f"{name[1:]} is ambiguous in:\n", fg="yellow")
splits = text.split(name, match_number)
click.echo(f"{name.join(splits[:match_number])}{click.style(name, fg='red')}{splits[-1]}\n")
occurrences.sort()
for i, field in enumerate(occurrences, 1):
click.secho(f" {i}: {'.'.join(field)}", fg="blue")
prompt = click.style(f"\nChoose the field to link to (1-{len(occurrences)}):", fg="yellow")
choices = click.Choice([str(i) for i in range(1, len(occurrences) + 1)])
choice = int(click.prompt(prompt, type=choices, prompt_suffix="", show_choices=False)) - 1
definition = occurrences[choice][0] if occurrences[choice][0][0].isupper() else None
url = f"project-schema.json,{f'/definitions/{definition}' if definition else ''},{'/'.join(path)}"
return f"{name[0]}[`{'.' if name[2] == '.' else ''}{'.'.join(path)}`]({url})"
minimal_project = {
"id": "oc4ids-bu3kcz-1",
}
unlinked_backticked_field = re.compile(r"[^\[]`[A-Za-z.]+`")
with filename.open() as f:
elements = yaml.safe_load(f)
with (basedir / "schema" / "project-level" / "project-schema.json").open() as f:
schema = json.load(f)
set_additional_properties(schema, additional_properties)
fields = set(_get_fields(schema))
for name, definition in schema["definitions"].items():
fields.update(_get_fields(definition, (name,)))
validator = Draft4Validator(schema, format_checker=FormatChecker())
additional_fields = defaultdict(list)
missing_data = defaultdict(list)
for element in elements:
identifier = element["id"]
title = element["title"]
# Check for missing data
for key, value in element.items():
# Don't report missing examples for cross-referenced mappings
if (value == "" or value is None) and key != "example" and "See [" not in value:
missing_data[key].append(f"{identifier} {title}")
# Format Markdown.
for key in ("title", "module", "indicator", "disclosure format", "mapping"):
value = element.get(key, "")
if link_fields and key == "mapping":
matches = defaultdict(int)
value = unlinked_backticked_field.sub(
lambda match, value=value: _link(match.group(0), fields, value), value
)
element[key] = mdformat.text(value, options={"number": True}).rstrip()
# Format and validate JSON.
example = element["example"]
if example and example != "N/A":
try:
data = json.loads(example)
project = deepcopy(minimal_project)
json_merge_patch.merge(project, data)
for e in validator.iter_errors(project):
if e.validator == "additionalProperties":
e.absolute_schema_path[-1] = "properties"
e.absolute_schema_path.append("")
for match in re.findall(r"'(\S+)'", e.message):
e.absolute_schema_path[-1] = match
additional_fields[
"/".join(e.absolute_schema_path)
.replace("items/properties/", "")
.replace("properties/", "")
].append([identifier, title])
else:
click.echo(
f"{identifier} ({title}): OC4IDS is invalid: "