From 384b232a81c478fb686efed7a641c0c8b87544e9 Mon Sep 17 00:00:00 2001 From: Matthew Somerville Date: Fri, 1 Mar 2024 13:21:17 +0000 Subject: [PATCH 1/3] Correct capitalisation. --- README.md | 14 +++++++------- app/domain/postcode_lookup_writer.py | 4 ++-- app/scripts/load_postcodes.py | 14 +++++++------- data/2024-01-28/README.md | 6 +++--- 4 files changed, 19 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index db7c9e8..8f45687 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # Postcode Lookup Generator If you just need a _simple_ lookup from postcode to _most likely_ constituency, you can download a CSV from the folks -at MySociety: +at mySociety: https://pages.mysociety.org/2025-constituencies/datasets/uk_parliament_2025_postcode_lookup/latest @@ -15,9 +15,9 @@ addresses in a single postcode are in the same constituency), then read on... [/data/2024-01-28/output/postcode-lookup.csv](https://github.com/asibs/postcode-lookup-generator/blob/main/data/2024-01-28/output/postcode-lookup.csv) Contains a row for each postcode (postcodes are stripped of any whitespace), and which constituencies we think the -postcode falls within. The constituency columns, `pcon_1`, etc contain the MySociety constituency short code. You can +postcode falls within. The constituency columns, `pcon_1`, etc contain the mySociety constituency short code. You can use this code to map to other constituency codes (eg. GSS code, etc) using the -[MySociety dataset here](https://pages.mysociety.org/2025-constituencies/data/parliament_con_2025/0.1.4/parl_constituencies_2025.csv). +[mySociety dataset here](https://pages.mysociety.org/2025-constituencies/data/parliament_con_2025/0.1.4/parl_constituencies_2025.csv). If a postcode is in more than one constituency, the `pcon_1` column will contain the constituency code we are _most confident_ of / the constituency we believe _most_ addresses in the postcode are in. @@ -177,14 +177,14 @@ then performs various geo-spatial queries on _every single address_. We can do various data validation on the installed data: ```sql --- Look for postcodes which are in the UPRN Lookup dataset, but which aren't in the MySociety dataset +-- Look for postcodes which are in the UPRN Lookup dataset, but which aren't in the mySociety dataset SELECT DISTINCT postcode FROM uprn_postcode_to_constituency uprn WHERE NOT EXISTS ( SELECT 1 FROM mysociety_postcode_to_constituency mysoc WHERE mysoc.postcode = uprn.postcode ); --- Look for postcodes which are in the MySociety dataset, but which aren't in the UPRN Lookup dataset +-- Look for postcodes which are in the mySociety dataset, but which aren't in the UPRN Lookup dataset SELECT DISTINCT postcode FROM mysociety_postcode_to_constituency mysoc WHERE NOT EXISTS ( @@ -205,8 +205,8 @@ WHERE NOT EXISTS ( SELECT 1 FROM uprn_postcode_to_constituency uprn WHERE uprn.postcode = onspd.postcode ); --- Look for postcodes which are in the MySociety dataset AND in the UPRN Lookup dataset, where the constituency --- identified by MySociety for that postcode has not been identified by our UPRN methodology +-- Look for postcodes which are in the mySociety dataset AND in the UPRN Lookup dataset, where the constituency +-- identified by mySociety for that postcode has not been identified by our UPRN methodology SELECT * FROM mysociety_postcode_to_constituency mysoc JOIN uprn_postcode_to_constituency uprn diff --git a/app/domain/postcode_lookup_writer.py b/app/domain/postcode_lookup_writer.py index 9af8765..078c3e1 100644 --- a/app/domain/postcode_lookup_writer.py +++ b/app/domain/postcode_lookup_writer.py @@ -74,8 +74,8 @@ def _calculate_confidences(self, parsed_row: dict[str, Any]) -> dict[str, float] onspd_match = next((item for item in parsed_row['onspd_pcons'] if item['pcon'] == pcon), None) mysoc_match = next((item for item in parsed_row['mysociety_pcons'] if item['pcon'] == pcon), None) - # We give 50% of the confidence to UPRN, then we give 25% each to ONSPD & MySoc, so a postcode -> constituency - # will only have 100% if ALL properties in the UPRN give the same constituency, and if both ONSPD & MySociety + # We give 50% of the confidence to UPRN, then we give 25% each to ONSPD & mySoc, so a postcode -> constituency + # will only have 100% if ALL properties in the UPRN give the same constituency, and if both ONSPD & mySociety # agree with this. Note, the total confidences for a postcode _may not_ add up to 100% if a single property in # ONSPD overlaps with multiple constituency boundaries (in practice, there are only a couple of records where # this is a problem). diff --git a/app/scripts/load_postcodes.py b/app/scripts/load_postcodes.py index 57adb7d..c0ceff9 100644 --- a/app/scripts/load_postcodes.py +++ b/app/scripts/load_postcodes.py @@ -223,13 +223,13 @@ def create_onspd_postcode_constituency_map(connection) -> None: ) connection.commit() -##### MySociety helper methods ##### +##### mySociety helper methods ##### def load_mysociety_constituencies(connection) -> None: invalid_postcodes = [] with connection.cursor() as cursor: - print(f"{time.ctime()} - Loading MySociety postcode to constituencies mappings") + print(f"{time.ctime()} - Loading mySociety postcode to constituencies mappings") cursor.execute( """ CREATE TABLE mysociety_postcode_to_constituency ( @@ -285,7 +285,7 @@ def create_combo_constituency_map(connection) -> None: SELECT postcode, COALESCE(constituency_code, 'UNKNOWN') AS constituency_code, - 'MySociety' AS source, + 'mySociety' AS source, (CASE WHEN constituency_code IS NULL THEN NULL ELSE 1.0 END) AS confidence, '' AS notes FROM mysociety_postcode_to_constituency @@ -385,7 +385,7 @@ def main() -> None: # set_onspd_postcode_coords(conn) # create_onspd_postcode_constituency_map(conn) - # # MySociety processing + # # mySociety processing # load_mysociety_constituencies(conn) # Combine the data @@ -397,7 +397,7 @@ def main() -> None: main() -# Once everything is loaded, you can connect to the postgis DB and check for any mismatch with the MySociety data with: +# Once everything is loaded, you can connect to the postgis DB and check for any mismatch with the mySociety data with: """ SELECT map.postcode, @@ -412,6 +412,6 @@ def main() -> None: AND map.constituency_code <> mysoc.constituency_code ORDER BY 1; """ -# TODO: Generate the final postcode -> constituncies map by combining _all_ constituencies from our map AND any from MySoc. +# TODO: Generate the final postcode -> constituncies map by combining _all_ constituencies from our map AND any from mySoc. # This should give us every postcode, and the list of possible constituencies - including any postcodes which are in multiple constituencies. -# For those constituencies, we can fallback to the DemoClub postcode lookup (once the election is announced - their API only returns data for boundaries with elections...) \ No newline at end of file +# For those constituencies, we can fallback to the DemoClub postcode lookup (once the election is announced - their API only returns data for boundaries with elections...) diff --git a/data/2024-01-28/README.md b/data/2024-01-28/README.md index 5166a9d..c604156 100644 --- a/data/2024-01-28/README.md +++ b/data/2024-01-28/README.md @@ -10,7 +10,7 @@ To get all the data: If you don't have the `wget` command line utility (eg. Windows) you can manually download the files and rename them appropriately. -## MySociety Constituency Data, including boundaries (V0.1.4) +## mySociety Constituency Data, including boundaries (V0.1.4) ### Files @@ -31,7 +31,7 @@ Creative Commons Attribution 4.0 International License https://pages.mysociety.org/2025-constituencies/datasets/parliament_con_2025/0_1_4 -## MySociety Postcode Data (V0.1.2) +## mySociety Postcode Data (V0.1.2) ### Files @@ -103,4 +103,4 @@ work out which new constituency the postcode centroid falls within. - Contains GeoPlace data © Local Government Information House Limited copyright and database right 2024 - Source: Office for National Statistics licensed under the Open Government Licence v.3.0 -https://www.ons.gov.uk/methodology/geography/licences \ No newline at end of file +https://www.ons.gov.uk/methodology/geography/licences From 3349ca959e0d326842886bd4ec6de335452579cc Mon Sep 17 00:00:00 2001 From: Matthew Somerville Date: Fri, 1 Mar 2024 13:22:00 +0000 Subject: [PATCH 2/3] Use link with more background information. It includes a link to the same postcode file. --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 8f45687..832da5d 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,9 @@ If you just need a _simple_ lookup from postcode to _most likely_ constituency, you can download a CSV from the folks at mySociety: -https://pages.mysociety.org/2025-constituencies/datasets/uk_parliament_2025_postcode_lookup/latest +https://www.mysociety.org/2023/09/12/navigating-the-new-constituencies/ + +Or use https://mapit.mysociety.org/ which includes Northern Ireland postcodes. If you care about the fact that _some_ postcodes may straddle multiple constituencies (ie. _sometimes_ not all addresses in a single postcode are in the same constituency), then read on... From 3be4b8e72524d3049e293bc190c2ce03d7606399 Mon Sep 17 00:00:00 2001 From: Matthew Somerville Date: Fri, 1 Mar 2024 13:27:16 +0000 Subject: [PATCH 3/3] Add point about non-address UPRNs. --- README.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 832da5d..af3c03e 100644 --- a/README.md +++ b/README.md @@ -111,8 +111,10 @@ seeing which constituency it overlaps with. If every UPRN in a single postcode is in the same constituency, we can assume that the whole postcode is within that constituency. -If different UPRNs within a single postcode have different constituencies, we know we have a postcode where the exact -address is needed to determine the constituency. +If different UPRNs within a single postcode have different constituencies, we know we might have a postcode where the exact +address is needed to determine the constituency. As the open UPRN data includes non-address UPRNs such as Street Records, +with no classification, it is possible for every address in the postcode to be in one constituency, but for all the UPRNs +to cover more than one constituency. At the time of writing, there's no open data source which maps UPRNs to a human-readable address. This means if a user's postcode straddles multiple constituencies, we can now detect it and tell the user (possibly asking them to