From 53b48e24d0bb0a905a92efdf4baff8806274f85d Mon Sep 17 00:00:00 2001 From: Colin Dellow Date: Sat, 18 Nov 2023 10:50:46 -0500 Subject: [PATCH 1/7] lua: use non-member functions for interop Currently, Tilemaker uses member functions for interop: ```lua function node_function(node) node:Layer(...) ``` This PR changes Tilemaker to use global functions: ```lua function node_function() Layer(...) ``` The chief rationale is performance. Every member function call needs to push an extra pointer onto the stack when crossing the Lua/C++ boundary. Kaguya serializes this pointer as a Lua userdata. That means every call into Lua has to malloc some memory, and every call back from Lua has to dereference through this pointer. And there are a lot of calls! For OMT on the GB extract, I counted ~1.4B calls from Lua into C++. A secondary rationale is that a global function is a bit more honest. A user might believe that this is currently permissible: ```lua last_node = nil function node_function(node) if last_node ~= nil -- do something with last_node end -- save the current node for later, for some reason last_node = node ``` But in reality, the OSM objects we pass into Lua don't behave quite like Lua objects. They're backed by OsmLuaProcessing, who will move on, invalidating whatever the user thinks they've got a reference to. This PR has a noticeable decrease in reading time for me, measured on the OMT profile for GB, on a 16-core computer: Before: ``` real 1m28.230s user 19m30.281s sys 0m29.610s ``` After: ``` real 1m21.728s user 17m27.150s sys 0m32.668s ``` The tradeoffs: - anyone with a custom Lua profile will need to update it, although the changes are fairly mechanical - Tilemaker now reserves several functions in the global namespace, causing the potential for conflicts --- docs/CONFIGURATION.md | 51 +-- docs/RELATIONS.md | 34 +- resources/process-coastline.lua | 4 +- resources/process-debug.lua | 324 +++++++++---------- resources/process-example.lua | 34 +- resources/process-openmaptiles.lua | 498 ++++++++++++++--------------- src/osm_lua_processing.cpp | 91 ++++-- 7 files changed, 542 insertions(+), 494 deletions(-) diff --git a/docs/CONFIGURATION.md b/docs/CONFIGURATION.md index d41fba9b..d605d153 100644 --- a/docs/CONFIGURATION.md +++ b/docs/CONFIGURATION.md @@ -107,13 +107,16 @@ For example: ### Lua processing -Your Lua file needs to supply 5 things: +Your Lua file needs to supply a few things: 1. `node_keys`, a list of those OSM keys which indicate that a node should be processed -2. `init_function(name)` (optional), a function to initialize Lua logic -2. `node_function(node)`, a function to process an OSM node and add it to layers -3. `way_function(way)`, a function to process an OSM way and add it to layers -3. `exit_function` (optional), a function to finalize Lua logic (useful to show statistics) +2. `node_function()`, a function to process an OSM node and add it to layers +3. `way_function()`, a function to process an OSM way and add it to layers +4. (optional) `init_function(name)`, a function to initialize Lua logic +5. (optional) `exit_function`, a function to finalize Lua logic (useful to show statistics) +6. (optional) `relation_scan_function`, a function to determine whether your Lua file wishes to process the given relation +7. (optional) `relation_function`, a function to process an OSM relation and add it to layers +8. (optional) `attribute_function`, a function to remap attributes from shapefiles `node_keys` is a simple list (or in Lua parlance, a 'table') of OSM tag keys. If a node has one of those keys, it will be processed by `node_function`; if not, it'll be skipped. For example, if you wanted to show highway crossings and railway stations, it should be `{ "highway", "railway" }`. (This avoids the need to process the vast majority of nodes which contain no important tags at all.) @@ -127,28 +130,30 @@ Note the order: you write to a layer first, then set attributes after. To do that, you use these methods: -* `node:Find(key)` or `way:Find(key)`: get the value for a tag, or the empty string if not present. For example, `way:Find("railway")` might return "rail" for a railway, "siding" for a siding, or "" if it isn't a railway at all. -* `node:Holds(key)` or `way:Holds(key)`: returns true if that key exists, false otherwise. -* `node:Layer("layer_name", false)` or `way:Layer("layer_name", is_area)`: write this node/way to the named layer. This is how you put objects in your vector tile. is_area (true/false) specifies whether a way should be treated as an area, or just as a linestring. -* `way:LayerAsCentroid("layer_name")`: write a single centroid point for this way to the named layer (useful for labels and POIs). -* `node:Attribute(key,value,minzoom)` or `node:Attribute(key,value,minzoom)`: add an attribute to the most recently written layer. Argument `minzoom` is optional, use it if you do not want to write the attribute on lower zoom levels. -* `node:AttributeNumeric(key,value,minzoom)`, `node:AttributeBoolean(key,value,minzoom)` (and `way:`...): for numeric/boolean columns. -* `node:Id()` or `way:Id()`: get the OSM ID of the current object. -* `node:ZOrder(number)` or `way:ZOrder(number)`: Set a numeric value (default 0, 1-byte signed integer) used to sort features within a layer. Use this feature to ensure a proper rendering order if the rendering engine itself does not support sorting. Sorting is not supported across layers merged with `write_to`. Features with different z-order are not merged if `combine_below` or `combine_polygons_below` is used. -* `node:MinZoom(zoom)` or `way:MinZoom(zoom)`: set the minimum zoom level (0-15) at which this object will be written. Note that the JSON layer configuration minimum still applies (so `:MinZoom(5)` will have no effect if your layer only starts at z6). -* `way:Length()` and `way:Area()`: return the length (metres)/area (square metres) of the current object. Requires recent Boost. -* `way:Centroid()`: return the lat/lon of the centre of the current object as a two-element Lua table (element 1 is lat, 2 is lon). +* `Find(key)`: get the value for a tag, or the empty string if not present. For example, `Find("railway")` might return "rail" for a railway, "siding" for a siding, or "" if it isn't a railway at all. +* `Holds(key)`: returns true if that key exists, false otherwise. +* `Layer("layer_name", is_area)`: write this node/way to the named layer. This is how you put objects in your vector tile. is_area (true/false) specifies whether a way should be treated as an area, or just as a linestring. +* `LayerAsCentroid("layer_name")`: write a single centroid point for this way to the named layer (useful for labels and POIs). +* `Attribute(key,value,minzoom)`: add an attribute to the most recently written layer. Argument `minzoom` is optional, use it if you do not want to write the attribute on lower zoom levels. +* `AttributeNumeric(key,value,minzoom)`, `AttributeBoolean(key,value,minzoom)`: for numeric/boolean columns. +* `Id()`: get the OSM ID of the current object. +* `ZOrder(number)`: Set a numeric value (default 0, 1-byte signed integer) used to sort features within a layer. Use this feature to ensure a proper rendering order if the rendering engine itself does not support sorting. Sorting is not supported across layers merged with `write_to`. Features with different z-order are not merged if `combine_below` or `combine_polygons_below` is used. +* `MinZoom(zoom)`: set the minimum zoom level (0-15) at which this object will be written. Note that the JSON layer configuration minimum still applies (so `:MinZoom(5)` will have no effect if your layer only starts at z6). +* `Length()` and `Area()`: return the length (metres)/area (square metres) of the current object. Requires recent Boost. +* `Centroid()`: return the lat/lon of the centre of the current object as a two-element Lua table (element 1 is lat, 2 is lon). The simplest possible function, to include roads/paths and nothing else, might look like this: - function way_function(way) - local highway = way:Find("highway") +```lua + function way_function() + local highway = Find("highway") if highway~="" then - way:Layer("roads", false) - way:Attribute("name", way:Find("name")) - way:Attribute("type", highway) + Layer("roads", false) + Attribute("name", Find("name")) + Attribute("type", highway) end end +``` Take a look at the supplied process.lua for a simple example, or the more complex OpenMapTiles-compatible script in `resources/`. You can specify another filename with the `--process` option. @@ -197,11 +202,11 @@ When processing OSM objects with your Lua script, you can perform simple spatial You can then find out whether a node is within one of these polygons using the `Intersects` method: - if node:Intersects("countries") then print("Looks like it's on land"); end + if Intersects("countries") then print("Looks like it's on land"); end Or you can find out what country(/ies) the node is within using `FindIntersecting`, which returns a table: - names = node:FindIntersecting("countries") + names = FindIntersecting("countries") print(table.concat(name,",")) To enable these functions, set `index` to true in your shapefile layer definition. `index_column` is not needed for `Intersects` but required for `FindIntersecting`. diff --git a/docs/RELATIONS.md b/docs/RELATIONS.md index 6e436b68..6fc3b557 100644 --- a/docs/RELATIONS.md +++ b/docs/RELATIONS.md @@ -22,26 +22,30 @@ This is a two-stage process: first, when reading relations, indicate that these To define which relations should be accepted, add a `relation_scan_function`: - function relation_scan_function(relation) - if relation:Find("type")=="route" and relation:Find("route")=="bicycle" then - local network = relation:Find("network") - if network=="ncn" then relation:Accept() end +```lua + function relation_scan_function() + if Find("type")=="route" and Find("route")=="bicycle" then + local network = Find("network") + if network=="ncn" then Accept() end end end +``` -This function takes the relation as its sole argument. Examine the tags using `relation:Find(key)` as normal. (You can also use `relation:Holds(key)` and `relation:Id()`.) If you want to use this relation, call `relation:Accept()`. +Examine the tags using `Find(key)` as normal. (You can also use `Holds(key)` and `Id()`.) If you want to use this relation, call `Accept()`. #### Stage 2: accessing relations from ways -Now that you've accepted the relations, they will be available from `way_function`. They are accessed using an iterator (`way:NextRelation()`) which reads each relation for that way in turn, returning nil when there are no more relations available. Once you have accessed a relation with the iterator, you can read its tags with `way:FindInRelation(key)`. For example: +Now that you've accepted the relations, they will be available from `way_function`. They are accessed using an iterator (`NextRelation()`) which reads each relation for that way in turn, returning nil when there are no more relations available. Once you have accessed a relation with the iterator, you can read its tags with `FindInRelation(key)`. For example: +```lua while true do - local rel = way:NextRelation() + local rel = NextRelation() if not rel then break end - print ("Part of route "..way:FindInRelation("ref")) + print ("Part of route "..FindInRelation("ref")) end +``` -(Should you need to re-read the relations, you can reset the iterator with `way:RestartRelations()`.) +(Should you need to re-read the relations, you can reset the iterator with `RestartRelations()`.) ### Writing relation geometries @@ -52,13 +56,15 @@ First, make sure that you have accepted the relations using `relation_scan_funct Then write a `relation_function`, which works in the same way as `way_function` would: - function relation_function(relation) - if relation:Find("type")=="route" and relation:Find("route")=="bicycle" then - relation:Layer("bike_routes", false) - relation:Attribute("class", relation:Find("network")) - relation:Attribute("ref", relation:Find("ref")) +```lua + function relation_function() + if Find("type")=="route" and Find("route")=="bicycle" then + Layer("bike_routes", false) + Attribute("class", Find("network")) + Attribute("ref", Find("ref")) end end +``` ### Not supported diff --git a/resources/process-coastline.lua b/resources/process-coastline.lua index 5e2aca8e..b49eeee5 100644 --- a/resources/process-coastline.lua +++ b/resources/process-coastline.lua @@ -10,10 +10,10 @@ function exit_function() end node_keys = {} -function node_function(node) +function node_function() end -function way_function(way) +function way_function() end -- Remap coastlines diff --git a/resources/process-debug.lua b/resources/process-debug.lua index ea594c19..e1c8e62f 100644 --- a/resources/process-debug.lua +++ b/resources/process-debug.lua @@ -45,36 +45,36 @@ aerodromeValues = Set { "international", "public", "regional", "military", "priv -- Process node tags node_keys = { "amenity", "shop", "sport", "tourism", "place", "office", "natural", "addr:housenumber", "aeroway" } -function node_function(node) +function node_function() -- Write 'aerodrome_label' - local aeroway = node:Find("aeroway") + local aeroway = Find("aeroway") if aeroway == "aerodrome" then - node:Layer("aerodrome_label", false) - SetNameAttributes(node) - node:Attribute("iata", node:Find("iata")) - SetEleAttributes(node) - node:Attribute("icao", node:Find("icao")) + Layer("aerodrome_label", false) + SetNameAttributes() + Attribute("iata", Find("iata")) + SetEleAttributes() + Attribute("icao", Find("icao")) - local aerodrome_value = node:Find("aerodrome") + local aerodrome_value = Find("aerodrome") local class if aerodromeValues[aerodrome_value] then class = aerodrome_value else class = "other" end - node:Attribute("class", class) + Attribute("class", class) end -- Write 'housenumber' - local housenumber = node:Find("addr:housenumber") + local housenumber = Find("addr:housenumber") if housenumber~="" then - node:Layer("housenumber", false) - node:Attribute("housenumber", housenumber) + Layer("housenumber", false) + Attribute("housenumber", housenumber) end -- Write 'place' -- note that OpenMapTiles has a rank for countries (1-3), states (1-6) and cities (1-10+); -- we could potentially approximate it for cities based on the population tag - local place = node:Find("place") + local place = Find("place") if place ~= "" then local rank = nil local mz = 13 - local pop = tonumber(node:Find("population")) or 0 + local pop = tonumber(Find("population")) or 0 if place == "continent" then mz=2 elseif place == "country" then mz=3; rank=1 @@ -90,31 +90,31 @@ function node_function(node) elseif place == "locality" then mz=13 end - node:Layer("place", false) - node:Attribute("class", place) - node:MinZoom(mz) - if rank then node:AttributeNumeric("rank", rank) end - SetNameAttributes(node) + Layer("place", false) + Attribute("class", place) + MinZoom(mz) + if rank then AttributeNumeric("rank", rank) end + SetNameAttributes() return end -- Write 'poi' - local rank, class, subclass = GetPOIRank(node) + local rank, class, subclass = GetPOIRank() if rank then WritePOI(node,class,subclass,rank) end -- Write 'mountain_peak' and 'water_name' - local natural = node:Find("natural") + local natural = Find("natural") if natural == "peak" or natural == "volcano" then - node:Layer("mountain_peak", false) - SetEleAttributes(node) - node:AttributeNumeric("rank", 1) - node:Attribute("class", natural) - SetNameAttributes(node) + Layer("mountain_peak", false) + SetEleAttributes() + AttributeNumeric("rank", 1) + Attribute("class", natural) + SetNameAttributes() return end if natural == "bay" then - node:Layer("water_name", false) - SetNameAttributes(node) + Layer("water_name", false) + SetNameAttributes() return end end @@ -196,33 +196,33 @@ waterClasses = Set { "river", "riverbank", "stream", "canal", "drain", "ditch waterwayClasses = Set { "stream", "river", "canal", "drain", "ditch" } -function way_function(way) - local highway = way:Find("highway") - local waterway = way:Find("waterway") - local water = way:Find("water") - local building = way:Find("building") - local natural = way:Find("natural") - local historic = way:Find("historic") - local landuse = way:Find("landuse") - local leisure = way:Find("leisure") - local amenity = way:Find("amenity") - local aeroway = way:Find("aeroway") - local railway = way:Find("railway") - local sport = way:Find("sport") - local shop = way:Find("shop") - local tourism = way:Find("tourism") - local man_made = way:Find("man_made") - local isClosed = way:IsClosed() - local housenumber = way:Find("addr:housenumber") +function way_function() + local highway = Find("highway") + local waterway = Find("waterway") + local water = Find("water") + local building = Find("building") + local natural = Find("natural") + local historic = Find("historic") + local landuse = Find("landuse") + local leisure = Find("leisure") + local amenity = Find("amenity") + local aeroway = Find("aeroway") + local railway = Find("railway") + local sport = Find("sport") + local shop = Find("shop") + local tourism = Find("tourism") + local man_made = Find("man_made") + local isClosed = IsClosed() + local housenumber = Find("addr:housenumber") local write_name = false - local construction = way:Find("construction") + local construction = Find("construction") -- Miscellaneous preprocessing - if way:Find("disused") == "yes" then return end + if Find("disused") == "yes" then return end if highway == "proposed" then return end if aerowayBuildings[aeroway] then building="yes"; aeroway="" end if landuse == "field" then landuse = "farmland" end - if landuse == "meadow" and way:Find("meadow")=="agricultural" then landuse="farmland" end + if landuse == "meadow" and Find("meadow")=="agricultural" then landuse="farmland" end -- Roads ('transportation' and 'transportation_name', plus 'transportation_name_detail') if highway~="" then @@ -235,33 +235,33 @@ function way_function(way) if trackValues[highway] then h = "track"; layer="transportation_detail" end if pathValues[highway] then h = "path" ; layer="transportation_detail" end if h=="service" then layer="transportation_detail" end - way:Layer(layer, false) - way:Attribute("class", h) - SetBrunnelAttributes(way) + Layer(layer, false) + Attribute("class", h) + SetBrunnelAttributes() -- Construction if highway == "construction" then if constructionValues[construction] then - way:Attribute("class", construction .. "_construction") + Attribute("class", construction .. "_construction") else - way:Attribute("class", "minor_construction") + Attribute("class", "minor_construction") end end -- Service - local service = way:Find("service") - if highway == "service" and service ~="" then way:Attribute("service", service) end + local service = Find("service") + if highway == "service" and service ~="" then Attribute("service", service) end -- Links (ramp) if linkValues[highway] then splitHighway = split(highway, "_") highway = splitHighway[1] - way:AttributeNumeric("ramp",1) + AttributeNumeric("ramp",1) end - local oneway = way:Find("oneway") + local oneway = Find("oneway") if oneway == "yes" or oneway == "1" then - way:AttributeNumeric("oneway",1) + AttributeNumeric("oneway",1) end if oneway == "-1" then -- **** TODO @@ -269,115 +269,115 @@ function way_function(way) -- Write names if layer == "motorway" or layer == "trunk" then - way:Layer("transportation_name", false) + Layer("transportation_name", false) elseif h == "minor" or h == "track" or h == "path" or h == "service" then - way:Layer("transportation_name_detail", false) + Layer("transportation_name_detail", false) else - way:Layer("transportation_name_mid", false) + Layer("transportation_name_mid", false) end - SetNameAttributes(way) - way:Attribute("class",h) - way:Attribute("network","road") -- **** needs fixing - if h~=highway then way:Attribute("subclass",highway) end - local ref = way:Find("ref") + SetNameAttributes() + Attribute("class",h) + Attribute("network","road") -- **** needs fixing + if h~=highway then Attribute("subclass",highway) end + local ref = Find("ref") if ref~="" then - way:Attribute("ref",ref) - way:AttributeNumeric("ref_length",ref:len()) + Attribute("ref",ref) + AttributeNumeric("ref_length",ref:len()) end end -- Railways ('transportation' and 'transportation_name', plus 'transportation_name_detail') if railway~="" then - way:Layer("transportation", false) - way:Attribute("class", railway) + Layer("transportation", false) + Attribute("class", railway) - way:Layer("transportation_name", false) - SetNameAttributes(way) - way:MinZoom(14) - way:Attribute("class", "rail") + Layer("transportation_name", false) + SetNameAttributes() + MinZoom(14) + Attribute("class", "rail") end -- 'Aeroway' if aeroway~="" then - way:Layer("aeroway", isClosed) - way:Attribute("class",aeroway) - way:Attribute("ref",way:Find("ref")) + Layer("aeroway", isClosed) + Attribute("class",aeroway) + Attribute("ref",Find("ref")) write_name = true end -- 'aerodrome_label' if aeroway=="aerodrome" then - way:LayerAsCentroid("aerodrome_label") - SetNameAttributes(way) - way:Attribute("iata", way:Find("iata")) - SetEleAttributes(way) - way:Attribute("icao", way:Find("icao")) + LayerAsCentroid("aerodrome_label") + SetNameAttributes() + Attribute("iata", Find("iata")) + SetEleAttributes() + Attribute("icao", Find("icao")) - local aerodrome = way:Find(aeroway) + local aerodrome = Find(aeroway) local class if aerodromeValues[aerodrome] then class = aerodrome else class = "other" end - way:Attribute("class", class) + Attribute("class", class) end -- Set 'waterway' and associated if waterwayClasses[waterway] and not isClosed then - if waterway == "river" and way:Holds("name") then - way:Layer("waterway", false) + if waterway == "river" and Holds("name") then + Layer("waterway", false) else - way:Layer("waterway_detail", false) + Layer("waterway_detail", false) end - if way:Find("intermittent")=="yes" then way:AttributeNumeric("intermittent", 1) else way:AttributeNumeric("intermittent", 0) end - way:Attribute("class", waterway) - SetNameAttributes(way) - SetBrunnelAttributes(way) - elseif waterway == "boatyard" then way:Layer("landuse", isClosed); way:Attribute("class", "industrial") - elseif waterway == "dam" then way:Layer("building",isClosed) - elseif waterway == "fuel" then way:Layer("landuse", isClosed); way:Attribute("class", "industrial") + if Find("intermittent")=="yes" then AttributeNumeric("intermittent", 1) else AttributeNumeric("intermittent", 0) end + Attribute("class", waterway) + SetNameAttributes() + SetBrunnelAttributes() + elseif waterway == "boatyard" then Layer("landuse", isClosed); Attribute("class", "industrial") + elseif waterway == "dam" then Layer("building",isClosed) + elseif waterway == "fuel" then Layer("landuse", isClosed); Attribute("class", "industrial") end -- Set names on rivers if waterwayClasses[waterway] and not isClosed then - if waterway == "river" and way:Holds("name") then - way:Layer("water_name", false) + if waterway == "river" and Holds("name") then + Layer("water_name", false) else - way:Layer("water_name_detail", false) - way:MinZoom(14) + Layer("water_name_detail", false) + MinZoom(14) end - way:Attribute("class", waterway) - SetNameAttributes(way) + Attribute("class", waterway) + SetNameAttributes() end -- Set 'building' and associated if building~="" then - way:Layer("building", true) - SetMinZoomByArea(way) + Layer("building", true) + SetMinZoomByArea() end -- Set 'housenumber' if housenumber~="" then - way:LayerAsCentroid("housenumber", false) - way:Attribute("housenumber", housenumber) + LayerAsCentroid("housenumber", false) + Attribute("housenumber", housenumber) end -- Set 'water' if natural=="water" or natural=="bay" or leisure=="swimming_pool" or landuse=="reservoir" or landuse=="basin" or waterClasses[waterway] then - if way:Find("covered")=="yes" or not isClosed then return end + if Find("covered")=="yes" or not isClosed then return end local class="lake"; if natural=="bay" then class="ocean" elseif waterway~="" then class="river" end - way:Layer("water",true) --- SetMinZoomByArea(way) - way:Attribute("class",class) + Layer("water",true) +-- SetMinZoomByArea() + Attribute("class",class) - if way:Find("intermittent")=="yes" then way:Attribute("intermittent",1) end + if Find("intermittent")=="yes" then Attribute("intermittent",1) end -- we only want to show the names of actual lakes not every man-made basin that probably doesn't even have a name other than "basin" -- examples for which we don't want to show a name: -- https://www.openstreetmap.org/way/25958687 -- https://www.openstreetmap.org/way/27201902 -- https://www.openstreetmap.org/way/25309134 -- https://www.openstreetmap.org/way/24579306 - if way:Holds("name") and natural=="water" and water ~= "basin" and water ~= "wastewater" then - way:LayerAsCentroid("water_name_detail") - SetNameAttributes(way) --- SetMinZoomByArea(way) - way:Attribute("class", class) + if Holds("name") and natural=="water" and water ~= "basin" and water ~= "wastewater" then + LayerAsCentroid("water_name_detail") + SetNameAttributes() +-- SetMinZoomByArea() + Attribute("class", class) end return -- in case we get any landuse processing @@ -388,11 +388,11 @@ function way_function(way) if l=="" then l=natural end if l=="" then l=leisure end if landcoverKeys[l] then - way:Layer("landcover", true) - SetMinZoomByArea(way) - way:Attribute("class", landcoverKeys[l]) - if l=="wetland" then way:Attribute("subclass", way:Find("wetland")) - else way:Attribute("subclass", l) end + Layer("landcover", true) + SetMinZoomByArea() + Attribute("class", landcoverKeys[l]) + if l=="wetland" then Attribute("subclass", Find("wetland")) + else Attribute("subclass", l) end write_name = true -- Set 'landuse' @@ -400,26 +400,26 @@ function way_function(way) if l=="" then l=amenity end if l=="" then l=tourism end if landuseKeys[l] then - way:Layer("landuse", true) - way:Attribute("class", l) + Layer("landuse", true) + Attribute("class", l) write_name = true end end -- Parks - if boundary=="national_park" then way:Layer("park",true); way:Attribute("class",boundary); SetNameAttributes(way) - elseif leisure=="nature_reserve" then way:Layer("park",true); way:Attribute("class",leisure ); SetNameAttributes(way) end + if boundary=="national_park" then Layer("park",true); Attribute("class",boundary); SetNameAttributes() + elseif leisure=="nature_reserve" then Layer("park",true); Attribute("class",leisure ); SetNameAttributes() end -- POIs ('poi' and 'poi_detail') - local rank, class, subclass = GetPOIRank(way) + local rank, class, subclass = GetPOIRank() if rank then WritePOI(way,class,subclass,rank); return end -- Catch-all - if (building~="" or write_name) and way:Holds("name") then - way:LayerAsCentroid("poi_detail") - SetNameAttributes(way) + if (building~="" or write_name) and Holds("name") then + LayerAsCentroid("poi_detail") + SetNameAttributes() if write_name then rank=6 else rank=25 end - way:AttributeNumeric("rank", rank) + AttributeNumeric("rank", rank) end end @@ -435,65 +435,67 @@ end function WritePOI(obj,class,subclass,rank) local layer = "poi" if rank>4 then layer="poi_detail" end - obj:LayerAsCentroid(layer) + LayerAsCentroid(layer) SetNameAttributes(obj) - obj:AttributeNumeric("rank", rank) - obj:Attribute("class", class) - obj:Attribute("subclass", subclass) + AttributeNumeric("rank", rank) + Attribute("class", class) + Attribute("subclass", subclass) end -- Set name attributes on any object function SetNameAttributes(obj) - local name = obj:Find("name"), main_written = name, iname + local name = Find("name") + local main_written = name + local iname -- if we have a preferred language, then write that (if available), and additionally write the base name tag - if preferred_language and obj:Holds("name:"..preferred_language) then - iname = obj:Find("name:"..preferred_language) + if preferred_language and Holds("name:"..preferred_language) then + iname = Find("name:"..preferred_language) print("Found "..preferred_language..": "..iname) - obj:Attribute(preferred_language_attribute, iname) + Attribute(preferred_language_attribute, iname) if iname~=name and default_language_attribute then - obj:Attribute(default_language_attribute, name) + Attribute(default_language_attribute, name) else main_written = iname end else - obj:Attribute(preferred_language_attribute, name) + Attribute(preferred_language_attribute, name) end -- then set any additional languages for i,lang in ipairs(additional_languages) do - iname = obj:Find("name:"..lang) + iname = Find("name:"..lang) if iname=="" then iname=name end - if iname~=main_written then obj:Attribute("name:"..lang, iname) end + if iname~=main_written then Attribute("name:"..lang, iname) end end end -- Set ele and ele_ft on any object function SetEleAttributes(obj) - local ele = obj:Find("ele") + local ele = Find("ele") if ele ~= "" then local meter = math.floor(tonumber(ele) or 0) local feet = math.floor(meter * 3.2808399) - obj:AttributeNumeric("ele", meter) - obj:AttributeNumeric("ele_ft", feet) + AttributeNumeric("ele", meter) + AttributeNumeric("ele_ft", feet) end end function SetBrunnelAttributes(obj) - if obj:Find("bridge") == "yes" then obj:Attribute("brunnel", "bridge") - elseif obj:Find("tunnel") == "yes" then obj:Attribute("brunnel", "tunnel") - elseif obj:Find("ford") == "yes" then obj:Attribute("brunnel", "ford") + if Find("bridge") == "yes" then Attribute("brunnel", "bridge") + elseif Find("tunnel") == "yes" then Attribute("brunnel", "tunnel") + elseif Find("ford") == "yes" then Attribute("brunnel", "ford") end end -- Set minimum zoom level by area -function SetMinZoomByArea(way) - local area=way:Area() - if area>ZRES5^2 then way:MinZoom(6) - elseif area>ZRES6^2 then way:MinZoom(7) - elseif area>ZRES7^2 then way:MinZoom(8) - elseif area>ZRES8^2 then way:MinZoom(9) - elseif area>ZRES9^2 then way:MinZoom(10) - elseif area>ZRES10^2 then way:MinZoom(11) - elseif area>ZRES11^2 then way:MinZoom(12) - elseif area>ZRES12^2 then way:MinZoom(13) - else way:MinZoom(14) end +function SetMinZoomByArea() + local area=Area() + if area>ZRES5^2 then MinZoom(6) + elseif area>ZRES6^2 then MinZoom(7) + elseif area>ZRES7^2 then MinZoom(8) + elseif area>ZRES8^2 then MinZoom(9) + elseif area>ZRES9^2 then MinZoom(10) + elseif area>ZRES10^2 then MinZoom(11) + elseif area>ZRES11^2 then MinZoom(12) + elseif area>ZRES12^2 then MinZoom(13) + else MinZoom(14) end end -- Calculate POIs (typically rank 1-4 go to 'poi' z12-14, rank 5+ to 'poi_detail' z14) @@ -503,8 +505,8 @@ function GetPOIRank(obj) -- Can we find the tag? for k,list in pairs(poiTags) do - if list[obj:Find(k)] then - v = obj:Find(k) -- k/v are the OSM tag pair + if list[Find(k)] then + v = Find(k) -- k/v are the OSM tag pair class = poiClasses[v] or v rank = poiClassRanks[class] or 25 return rank, class, v @@ -512,7 +514,7 @@ function GetPOIRank(obj) end -- Catch-all for shops - local shop = obj:Find("shop") + local shop = Find("shop") if shop~="" then return poiClassRanks['shop'], "shop", shop end -- Nothing found diff --git a/resources/process-example.lua b/resources/process-example.lua index 41b461df..b4b1f108 100644 --- a/resources/process-example.lua +++ b/resources/process-example.lua @@ -14,33 +14,33 @@ end -- Assign nodes to a layer, and set attributes, based on OSM tags function node_function(node) - local amenity = node:Find("amenity") - local shop = node:Find("shop") + local amenity = Find("amenity") + local shop = Find("shop") if amenity~="" or shop~="" then - node:Layer("poi", false) - if amenity~="" then node:Attribute("class",amenity) - else node:Attribute("class",shop) end - node:Attribute("name", node:Find("name")) + Layer("poi", false) + if amenity~="" then Attribute("class",amenity) + else Attribute("class",shop) end + Attribute("name", Find("name")) end end -- Similarly for ways -function way_function(way) - local highway = way:Find("highway") - local waterway = way:Find("waterway") - local building = way:Find("building") +function way_function() + local highway = Find("highway") + local waterway = Find("waterway") + local building = Find("building") if highway~="" then - way:Layer("transportation", false) - way:Attribute("class", highway) --- way:Attribute("id",way:Id()) --- way:AttributeNumeric("area",37) + Layer("transportation", false) + Attribute("class", highway) +-- Attribute("id",Id()) +-- AttributeNumeric("area",37) end if waterway~="" then - way:Layer("waterway", false) - way:Attribute("class", waterway) + Layer("waterway", false) + Attribute("class", waterway) end if building~="" then - way:Layer("building", true) + Layer("building", true) end end diff --git a/resources/process-openmaptiles.lua b/resources/process-openmaptiles.lua index c7f74745..31e487fd 100644 --- a/resources/process-openmaptiles.lua +++ b/resources/process-openmaptiles.lua @@ -118,36 +118,36 @@ function calcRank(place, population, capital_al) end -function node_function(node) +function node_function() -- Write 'aerodrome_label' - local aeroway = node:Find("aeroway") + local aeroway = Find("aeroway") if aeroway == "aerodrome" then - node:Layer("aerodrome_label", false) - SetNameAttributes(node) - node:Attribute("iata", node:Find("iata")) - SetEleAttributes(node) - node:Attribute("icao", node:Find("icao")) + Layer("aerodrome_label", false) + SetNameAttributes() + Attribute("iata", Find("iata")) + SetEleAttributes() + Attribute("icao", Find("icao")) - local aerodrome_value = node:Find("aerodrome") + local aerodrome_value = Find("aerodrome") local class if aerodromeValues[aerodrome_value] then class = aerodrome_value else class = "other" end - node:Attribute("class", class) + Attribute("class", class) end -- Write 'housenumber' - local housenumber = node:Find("addr:housenumber") + local housenumber = Find("addr:housenumber") if housenumber~="" then - node:Layer("housenumber", false) - node:Attribute("housenumber", housenumber) + Layer("housenumber", false) + Attribute("housenumber", housenumber) end -- Write 'place' -- note that OpenMapTiles has a rank for countries (1-3), states (1-6) and cities (1-10+); -- we could potentially approximate it for cities based on the population tag - local place = node:Find("place") + local place = Find("place") if place ~= "" then local mz = 13 - local pop = tonumber(node:Find("population")) or 0 - local capital = capitalLevel(node:Find("capital")) + local pop = tonumber(Find("population")) or 0 + local capital = capitalLevel(Find("capital")) local rank = calcRank(place, pop, capital) if place == "continent" then mz=0 @@ -167,33 +167,33 @@ function node_function(node) elseif place == "locality" then mz=13 end - node:Layer("place", false) - node:Attribute("class", place) - node:MinZoom(mz) - if rank then node:AttributeNumeric("rank", rank) end - if capital then node:AttributeNumeric("capital", capital) end - if place=="country" then node:Attribute("iso_a2", node:Find("ISO3166-1:alpha2")) end - SetNameAttributes(node) + Layer("place", false) + Attribute("class", place) + MinZoom(mz) + if rank then AttributeNumeric("rank", rank) end + if capital then AttributeNumeric("capital", capital) end + if place=="country" then Attribute("iso_a2", Find("ISO3166-1:alpha2")) end + SetNameAttributes() return end -- Write 'poi' - local rank, class, subclass = GetPOIRank(node) - if rank then WritePOI(node,class,subclass,rank) end + local rank, class, subclass = GetPOIRank() + if rank then WritePOI(class,subclass,rank) end -- Write 'mountain_peak' and 'water_name' - local natural = node:Find("natural") + local natural = Find("natural") if natural == "peak" or natural == "volcano" then - node:Layer("mountain_peak", false) - SetEleAttributes(node) - node:AttributeNumeric("rank", 1) - node:Attribute("class", natural) - SetNameAttributes(node) + Layer("mountain_peak", false) + SetEleAttributes() + AttributeNumeric("rank", 1) + Attribute("class", natural) + SetNameAttributes() return end if natural == "bay" then - node:Layer("water_name", false) - SetNameAttributes(node) + Layer("water_name", false) + SetNameAttributes() return end end @@ -279,81 +279,81 @@ waterwayClasses = Set { "stream", "river", "canal", "drain", "ditch" } -- Scan relations for use in ways -function relation_scan_function(relation) - if relation:Find("type")=="boundary" and relation:Find("boundary")=="administrative" then - relation:Accept() +function relation_scan_function() + if Find("type")=="boundary" and Find("boundary")=="administrative" then + Accept() end end -function write_to_transportation_layer(way, minzoom, highway_class) - way:Layer("transportation", false) - way:MinZoom(minzoom) - SetZOrder(way) - way:Attribute("class", highway_class) - SetBrunnelAttributes(way) - if ramp then way:AttributeNumeric("ramp",1) end +function write_to_transportation_layer(minzoom, highway_class) + Layer("transportation", false) + MinZoom(minzoom) + SetZOrder() + Attribute("class", highway_class) + SetBrunnelAttributes() + if ramp then AttributeNumeric("ramp",1) end -- Service - if highway == "service" and service ~="" then way:Attribute("service", service) end + if highway == "service" and service ~="" then Attribute("service", service) end - local oneway = way:Find("oneway") + local oneway = Find("oneway") if oneway == "yes" or oneway == "1" then - way:AttributeNumeric("oneway",1) + AttributeNumeric("oneway",1) end if oneway == "-1" then -- **** TODO end - local surface = way:Find("surface") - local surfaceMinzoom = 12 + local surface = Find("surface") + local surfaceMinzoom = 12 if pavedValues[surface] then - way:Attribute("surface", "paved", surfaceMinzoom) + Attribute("surface", "paved", surfaceMinzoom) elseif unpavedValues[surface] then - way:Attribute("surface", "unpaved", surfaceMinzoom) - end - local accessMinzoom = 9 - if way:Holds("access") then way:Attribute("access", way:Find("access"), accessMinzoom) end - if way:Holds("bicycle") then way:Attribute("bicycle", way:Find("bicycle"), accessMinzoom) end - if way:Holds("foot") then way:Attribute("foot", way:Find("foot"), accessMinzoom) end - if way:Holds("horse") then way:Attribute("horse", way:Find("horse"), accessMinzoom) end - way:AttributeBoolean("toll", way:Find("toll") == "yes", accessMinzoom) - way:AttributeNumeric("layer", tonumber(way:Find("layer")) or 0, accessMinzoom) - way:AttributeBoolean("expressway", way:Find("expressway"), 7) - way:Attribute("mtb_scale", way:Find("mtb:scale"), 10) + Attribute("surface", "unpaved", surfaceMinzoom) + end + local accessMinzoom = 9 + if Holds("access") then Attribute("access", Find("access"), accessMinzoom) end + if Holds("bicycle") then Attribute("bicycle", Find("bicycle"), accessMinzoom) end + if Holds("foot") then Attribute("foot", Find("foot"), accessMinzoom) end + if Holds("horse") then Attribute("horse", Find("horse"), accessMinzoom) end + AttributeBoolean("toll", Find("toll") == "yes", accessMinzoom) + AttributeNumeric("layer", tonumber(Find("layer")) or 0, accessMinzoom) + AttributeBoolean("expressway", Find("expressway"), 7) + Attribute("mtb_scale", Find("mtb:scale"), 10) end -- Process way tags -function way_function(way) - local route = way:Find("route") - local highway = way:Find("highway") - local waterway = way:Find("waterway") - local water = way:Find("water") - local building = way:Find("building") - local natural = way:Find("natural") - local historic = way:Find("historic") - local landuse = way:Find("landuse") - local leisure = way:Find("leisure") - local amenity = way:Find("amenity") - local aeroway = way:Find("aeroway") - local railway = way:Find("railway") - local service = way:Find("service") - local sport = way:Find("sport") - local shop = way:Find("shop") - local tourism = way:Find("tourism") - local man_made = way:Find("man_made") - local boundary = way:Find("boundary") - local isClosed = way:IsClosed() - local housenumber = way:Find("addr:housenumber") +function way_function() + local route = Find("route") + local highway = Find("highway") + local waterway = Find("waterway") + local water = Find("water") + local building = Find("building") + local natural = Find("natural") + local historic = Find("historic") + local landuse = Find("landuse") + local leisure = Find("leisure") + local amenity = Find("amenity") + local aeroway = Find("aeroway") + local railway = Find("railway") + local service = Find("service") + local sport = Find("sport") + local shop = Find("shop") + local tourism = Find("tourism") + local man_made = Find("man_made") + local boundary = Find("boundary") + local isClosed = IsClosed() + local housenumber = Find("addr:housenumber") local write_name = false - local construction = way:Find("construction") + local construction = Find("construction") -- Miscellaneous preprocessing - if way:Find("disused") == "yes" then return end - if boundary~="" and way:Find("protection_title")=="National Forest" and way:Find("operator")=="United States Forest Service" then return end + if Find("disused") == "yes" then return end + if boundary~="" and Find("protection_title")=="National Forest" and Find("operator")=="United States Forest Service" then return end if highway == "proposed" then return end if aerowayBuildings[aeroway] then building="yes"; aeroway="" end if landuse == "field" then landuse = "farmland" end - if landuse == "meadow" and way:Find("meadow")=="agricultural" then landuse="farmland" end + if landuse == "meadow" and Find("meadow")=="agricultural" then landuse="farmland" end -- Boundaries within relations -- note that we process administrative boundaries as properties on ways, rather than as single relation geometries, @@ -361,21 +361,21 @@ function way_function(way) local admin_level = 11 local isBoundary = false while true do - local rel = way:NextRelation() + local rel = NextRelation() if not rel then break end isBoundary = true - admin_level = math.min(admin_level, tonumber(way:FindInRelation("admin_level")) or 11) + admin_level = math.min(admin_level, tonumber(FindInRelation("admin_level")) or 11) end -- Boundaries in ways if boundary=="administrative" then - admin_level = math.min(admin_level, tonumber(way:Find("admin_level")) or 11) + admin_level = math.min(admin_level, tonumber(Find("admin_level")) or 11) isBoundary = true end -- Administrative boundaries -- https://openmaptiles.org/schema/#boundary - if isBoundary and not (way:Find("maritime")=="yes") then + if isBoundary and not (Find("maritime")=="yes") then local mz = 0 if admin_level>=3 and admin_level<5 then mz=4 elseif admin_level>=5 and admin_level<7 then mz=8 @@ -383,22 +383,22 @@ function way_function(way) elseif admin_level>=8 then mz=12 end - way:Layer("boundary",false) - way:AttributeNumeric("admin_level", admin_level) - way:MinZoom(mz) + Layer("boundary",false) + AttributeNumeric("admin_level", admin_level) + MinZoom(mz) -- disputed status (0 or 1). some styles need to have the 0 to show it. - local disputed = way:Find("disputed") + local disputed = Find("disputed") if disputed=="yes" then - way:AttributeNumeric("disputed", 1) + AttributeNumeric("disputed", 1) else - way:AttributeNumeric("disputed", 0) + AttributeNumeric("disputed", 0) end end -- Roads ('transportation' and 'transportation_name', plus 'transportation_name_detail') if highway~="" then - local access = way:Find("access") - local surface = way:Find("surface") + local access = Find("access") + local surface = Find("surface") local h = highway local minzoom = 99 @@ -439,158 +439,158 @@ function way_function(way) -- Write to layer if minzoom <= 14 then - write_to_transportation_layer(way, minzoom, h) + write_to_transportation_layer(minzoom, h) -- Write names if minzoom < 8 then minzoom = 8 end if highway == "motorway" or highway == "trunk" then - way:Layer("transportation_name", false) - way:MinZoom(minzoom) + Layer("transportation_name", false) + MinZoom(minzoom) elseif h == "minor" or h == "track" or h == "path" or h == "service" then - way:Layer("transportation_name_detail", false) - way:MinZoom(minzoom) + Layer("transportation_name_detail", false) + MinZoom(minzoom) else - way:Layer("transportation_name_mid", false) - way:MinZoom(minzoom) + Layer("transportation_name_mid", false) + MinZoom(minzoom) end - SetNameAttributes(way) - way:Attribute("class",h) - way:Attribute("network","road") -- **** could also be us-interstate, us-highway, us-state - if h~=highway then way:Attribute("subclass",highway) end - local ref = way:Find("ref") + SetNameAttributes() + Attribute("class",h) + Attribute("network","road") -- **** could also be us-interstate, us-highway, us-state + if h~=highway then Attribute("subclass",highway) end + local ref = Find("ref") if ref~="" then - way:Attribute("ref",ref) - way:AttributeNumeric("ref_length",ref:len()) + Attribute("ref",ref) + AttributeNumeric("ref_length",ref:len()) end end end -- Railways ('transportation' and 'transportation_name', plus 'transportation_name_detail') if railway~="" then - way:Layer("transportation", false) - way:Attribute("class", railway) - SetZOrder(way) - SetBrunnelAttributes(way) + Layer("transportation", false) + Attribute("class", railway) + SetZOrder() + SetBrunnelAttributes() if service~="" then - way:Attribute("service", service) - way:MinZoom(12) + Attribute("service", service) + MinZoom(12) else - way:MinZoom(9) + MinZoom(9) end - way:Layer("transportation_name", false) - SetNameAttributes(way) - way:MinZoom(14) - way:Attribute("class", "rail") + Layer("transportation_name", false) + SetNameAttributes() + MinZoom(14) + Attribute("class", "rail") end -- Pier if man_made=="pier" then - way:Layer("transportation", isClosed) - SetZOrder(way) - way:Attribute("class", "pier") - SetMinZoomByArea(way) + Layer("transportation", isClosed) + SetZOrder() + Attribute("class", "pier") + SetMinZoomByArea() end -- 'Ferry' if route=="ferry" then - way:Layer("transportation", false) - way:Attribute("class", "ferry") - SetZOrder(way) - way:MinZoom(9) - SetBrunnelAttributes(way) + Layer("transportation", false) + Attribute("class", "ferry") + SetZOrder() + MinZoom(9) + SetBrunnelAttributes() - way:Layer("transportation_name", false) - SetNameAttributes(way) - way:MinZoom(12) - way:Attribute("class", "ferry") + Layer("transportation_name", false) + SetNameAttributes() + MinZoom(12) + Attribute("class", "ferry") end -- 'Aeroway' if aeroway~="" then - way:Layer("aeroway", isClosed) - way:Attribute("class",aeroway) - way:Attribute("ref",way:Find("ref")) + Layer("aeroway", isClosed) + Attribute("class",aeroway) + Attribute("ref",Find("ref")) write_name = true end -- 'aerodrome_label' if aeroway=="aerodrome" then - way:LayerAsCentroid("aerodrome_label") - SetNameAttributes(way) - way:Attribute("iata", way:Find("iata")) - SetEleAttributes(way) - way:Attribute("icao", way:Find("icao")) + LayerAsCentroid("aerodrome_label") + SetNameAttributes() + Attribute("iata", Find("iata")) + SetEleAttributes() + Attribute("icao", Find("icao")) - local aerodrome = way:Find(aeroway) + local aerodrome = Find(aeroway) local class if aerodromeValues[aerodrome] then class = aerodrome else class = "other" end - way:Attribute("class", class) + Attribute("class", class) end -- Set 'waterway' and associated if waterwayClasses[waterway] and not isClosed then - if waterway == "river" and way:Holds("name") then - way:Layer("waterway", false) + if waterway == "river" and Holds("name") then + Layer("waterway", false) else - way:Layer("waterway_detail", false) + Layer("waterway_detail", false) end - if way:Find("intermittent")=="yes" then way:AttributeNumeric("intermittent", 1) else way:AttributeNumeric("intermittent", 0) end - way:Attribute("class", waterway) - SetNameAttributes(way) - SetBrunnelAttributes(way) - elseif waterway == "boatyard" then way:Layer("landuse", isClosed); way:Attribute("class", "industrial"); way:MinZoom(12) - elseif waterway == "dam" then way:Layer("building",isClosed) - elseif waterway == "fuel" then way:Layer("landuse", isClosed); way:Attribute("class", "industrial"); way:MinZoom(14) + if Find("intermittent")=="yes" then AttributeNumeric("intermittent", 1) else AttributeNumeric("intermittent", 0) end + Attribute("class", waterway) + SetNameAttributes() + SetBrunnelAttributes() + elseif waterway == "boatyard" then Layer("landuse", isClosed); Attribute("class", "industrial"); MinZoom(12) + elseif waterway == "dam" then Layer("building",isClosed) + elseif waterway == "fuel" then Layer("landuse", isClosed); Attribute("class", "industrial"); MinZoom(14) end -- Set names on rivers if waterwayClasses[waterway] and not isClosed then - if waterway == "river" and way:Holds("name") then - way:Layer("water_name", false) + if waterway == "river" and Holds("name") then + Layer("water_name", false) else - way:Layer("water_name_detail", false) - way:MinZoom(14) + Layer("water_name_detail", false) + MinZoom(14) end - way:Attribute("class", waterway) - SetNameAttributes(way) + Attribute("class", waterway) + SetNameAttributes() end -- Set 'building' and associated if building~="" then - way:Layer("building", true) - SetBuildingHeightAttributes(way) - SetMinZoomByArea(way) + Layer("building", true) + SetBuildingHeightAttributes() + SetMinZoomByArea() end -- Set 'housenumber' if housenumber~="" then - way:LayerAsCentroid("housenumber", false) - way:Attribute("housenumber", housenumber) + LayerAsCentroid("housenumber", false) + Attribute("housenumber", housenumber) end -- Set 'water' if natural=="water" or leisure=="swimming_pool" or landuse=="reservoir" or landuse=="basin" or waterClasses[waterway] then - if way:Find("covered")=="yes" or not isClosed then return end + if Find("covered")=="yes" or not isClosed then return end local class="lake"; if waterway~="" then class="river" end - if class=="lake" and way:Find("wikidata")=="Q192770" then return end - way:Layer("water",true) + if class=="lake" and Find("wikidata")=="Q192770" then return end + Layer("water",true) SetMinZoomByArea(way) - way:Attribute("class",class) + Attribute("class",class) - if way:Find("intermittent")=="yes" then way:Attribute("intermittent",1) end + if Find("intermittent")=="yes" then Attribute("intermittent",1) end -- we only want to show the names of actual lakes not every man-made basin that probably doesn't even have a name other than "basin" -- examples for which we don't want to show a name: -- https://www.openstreetmap.org/way/25958687 -- https://www.openstreetmap.org/way/27201902 -- https://www.openstreetmap.org/way/25309134 -- https://www.openstreetmap.org/way/24579306 - if way:Holds("name") and natural=="water" and water ~= "basin" and water ~= "wastewater" then - way:LayerAsCentroid("water_name_detail") - SetNameAttributes(way) - SetMinZoomByArea(way) - way:Attribute("class", class) + if Holds("name") and natural=="water" and water ~= "basin" and water ~= "wastewater" then + LayerAsCentroid("water_name_detail") + SetNameAttributes() + SetMinZoomByArea() + Attribute("class", class) end return -- in case we get any landuse processing @@ -601,11 +601,11 @@ function way_function(way) if l=="" then l=natural end if l=="" then l=leisure end if landcoverKeys[l] then - way:Layer("landcover", true) - SetMinZoomByArea(way) - way:Attribute("class", landcoverKeys[l]) - if l=="wetland" then way:Attribute("subclass", way:Find("wetland")) - else way:Attribute("subclass", l) end + Layer("landcover", true) + SetMinZoomByArea() + Attribute("class", landcoverKeys[l]) + if l=="wetland" then Attribute("subclass", Find("wetland")) + else Attribute("subclass", l) end write_name = true -- Set 'landuse' @@ -613,31 +613,31 @@ function way_function(way) if l=="" then l=amenity end if l=="" then l=tourism end if landuseKeys[l] then - way:Layer("landuse", true) - way:Attribute("class", l) + Layer("landuse", true) + Attribute("class", l) if l=="residential" then - if way:Area()4 then layer="poi_detail" end - obj:LayerAsCentroid(layer) - SetNameAttributes(obj) - obj:AttributeNumeric("rank", rank) - obj:Attribute("class", class) - obj:Attribute("subclass", subclass) + LayerAsCentroid(layer) + SetNameAttributes() + AttributeNumeric("rank", rank) + Attribute("class", class) + Attribute("subclass", subclass) -- layer defaults to 0 - obj:AttributeNumeric("layer", tonumber(obj:Find("layer")) or 0) + AttributeNumeric("layer", tonumber(Find("layer")) or 0) -- indoor defaults to false - obj:AttributeBoolean("indoor", (obj:Find("indoor") == "yes")) + AttributeBoolean("indoor", (Find("indoor") == "yes")) -- level has no default - local level = tonumber(obj:Find("level")) + local level = tonumber(Find("level")) if level then - obj:AttributeNumeric("level", level) + AttributeNumeric("level", level) end end -- Set name attributes on any object -function SetNameAttributes(obj) - local name = obj:Find("name"), iname +function SetNameAttributes() + local name = Find("name"), iname local main_written = name -- if we have a preferred language, then write that (if available), and additionally write the base name tag - if preferred_language and obj:Holds("name:"..preferred_language) then - iname = obj:Find("name:"..preferred_language) - obj:Attribute(preferred_language_attribute, iname) + if preferred_language and Holds("name:"..preferred_language) then + iname = Find("name:"..preferred_language) + Attribute(preferred_language_attribute, iname) if iname~=name and default_language_attribute then - obj:Attribute(default_language_attribute, name) + Attribute(default_language_attribute, name) else main_written = iname end else - obj:Attribute(preferred_language_attribute, name) + Attribute(preferred_language_attribute, name) end -- then set any additional languages for i,lang in ipairs(additional_languages) do - iname = obj:Find("name:"..lang) + iname = Find("name:"..lang) if iname=="" then iname=name end - if iname~=main_written then obj:Attribute("name:"..lang, iname) end + if iname~=main_written then Attribute("name:"..lang, iname) end end end -- Set ele and ele_ft on any object -function SetEleAttributes(obj) - local ele = obj:Find("ele") +function SetEleAttributes() + local ele = Find("ele") if ele ~= "" then local meter = math.floor(tonumber(ele) or 0) local feet = math.floor(meter * 3.2808399) - obj:AttributeNumeric("ele", meter) - obj:AttributeNumeric("ele_ft", feet) + AttributeNumeric("ele", meter) + AttributeNumeric("ele_ft", feet) end end -function SetBrunnelAttributes(obj) - if obj:Find("bridge") == "yes" then obj:Attribute("brunnel", "bridge") - elseif obj:Find("tunnel") == "yes" then obj:Attribute("brunnel", "tunnel") - elseif obj:Find("ford") == "yes" then obj:Attribute("brunnel", "ford") +function SetBrunnelAttributes() + if Find("bridge") == "yes" then Attribute("brunnel", "bridge") + elseif Find("tunnel") == "yes" then Attribute("brunnel", "tunnel") + elseif Find("ford") == "yes" then Attribute("brunnel", "ford") end end -- Set minimum zoom level by area -function SetMinZoomByArea(way) - local area=way:Area() - if area>ZRES5^2 then way:MinZoom(6) - elseif area>ZRES6^2 then way:MinZoom(7) - elseif area>ZRES7^2 then way:MinZoom(8) - elseif area>ZRES8^2 then way:MinZoom(9) - elseif area>ZRES9^2 then way:MinZoom(10) - elseif area>ZRES10^2 then way:MinZoom(11) - elseif area>ZRES11^2 then way:MinZoom(12) - elseif area>ZRES12^2 then way:MinZoom(13) - else way:MinZoom(14) end +function SetMinZoomByArea() + local area=Area() + if area>ZRES5^2 then MinZoom(6) + elseif area>ZRES6^2 then MinZoom(7) + elseif area>ZRES7^2 then MinZoom(8) + elseif area>ZRES8^2 then MinZoom(9) + elseif area>ZRES9^2 then MinZoom(10) + elseif area>ZRES10^2 then MinZoom(11) + elseif area>ZRES11^2 then MinZoom(12) + elseif area>ZRES12^2 then MinZoom(13) + else MinZoom(14) end end -- Calculate POIs (typically rank 1-4 go to 'poi' z12-14, rank 5+ to 'poi_detail' z14) -- returns rank, class, subclass -function GetPOIRank(obj) +function GetPOIRank() local k,list,v,class,rank -- Can we find the tag? for k,list in pairs(poiTags) do - if list[obj:Find(k)] then - v = obj:Find(k) -- k/v are the OSM tag pair + if list[Find(k)] then + v = Find(k) -- k/v are the OSM tag pair class = poiClasses[v] or k rank = poiClassRanks[class] or 25 subclassKey = poiSubClasses[v] if subclassKey then class = v - v = obj:Find(subclassKey) + v = Find(subclassKey) end return rank, class, v end end -- Catch-all for shops - local shop = obj:Find("shop") + local shop = Find("shop") if shop~="" then return poiClassRanks['shop'], "shop", shop end -- Nothing found return nil,nil,nil end -function SetBuildingHeightAttributes(way) - local height = tonumber(way:Find("height"), 10) - local minHeight = tonumber(way:Find("min_height"), 10) - local levels = tonumber(way:Find("building:levels"), 10) - local minLevel = tonumber(way:Find("building:min_level"), 10) +function SetBuildingHeightAttributes() + local height = tonumber(Find("height"), 10) + local minHeight = tonumber(Find("min_height"), 10) + local levels = tonumber(Find("building:levels"), 10) + local minLevel = tonumber(Find("building:min_level"), 10) local renderHeight = BUILDING_FLOOR_HEIGHT if height or levels then @@ -779,17 +779,17 @@ function SetBuildingHeightAttributes(way) renderHeight = renderHeight + renderMinHeight end - way:AttributeNumeric("render_height", renderHeight) - way:AttributeNumeric("render_min_height", renderMinHeight) + AttributeNumeric("render_height", renderHeight) + AttributeNumeric("render_min_height", renderMinHeight) end -- Implement z_order as calculated by Imposm -- See https://imposm.org/docs/imposm3/latest/mapping.html#wayzorder for details. -function SetZOrder(way) - local highway = way:Find("highway") - local layer = tonumber(way:Find("layer")) - local bridge = way:Find("bridge") - local tunnel = way:Find("tunnel") +function SetZOrder() + local highway = Find("highway") + local layer = tonumber(Find("layer")) + local bridge = Find("bridge") + local tunnel = Find("tunnel") local zOrder = 0 if bridge ~= "" and bridge ~= "no" then zOrder = zOrder + 10 @@ -820,7 +820,7 @@ function SetZOrder(way) hwClass = 3 end zOrder = zOrder + hwClass - way:ZOrder(zOrder) + ZOrder(zOrder) end -- ========================================================== diff --git a/src/osm_lua_processing.cpp b/src/osm_lua_processing.cpp index f94d6734..51e084cd 100644 --- a/src/osm_lua_processing.cpp +++ b/src/osm_lua_processing.cpp @@ -10,6 +10,31 @@ using namespace std; thread_local kaguya::State *g_luaState = nullptr; +thread_local OsmLuaProcessing* osmLuaProcessing = nullptr; + +std::string rawId() { return osmLuaProcessing->Id(); } +bool rawHolds(const std::string& key) { return osmLuaProcessing->Holds(key); } +const std::string& rawFind(const std::string& key) { return osmLuaProcessing->Find(key); } +std::vector rawFindIntersecting(const std::string &layerName) { return osmLuaProcessing->FindIntersecting(layerName); } +bool rawIntersects(const std::string& layerName) { return osmLuaProcessing->Intersects(layerName); } +std::vector rawFindCovering(const std::string& layerName) { return osmLuaProcessing->FindCovering(layerName); } +bool rawCoveredBy(const std::string& layerName) { return osmLuaProcessing->CoveredBy(layerName); } +bool rawIsClosed() { return osmLuaProcessing->IsClosed(); } +double rawArea() { return osmLuaProcessing->Area(); } +double rawLength() { return osmLuaProcessing->Length(); } +std::vector Centroid() { return osmLuaProcessing->Centroid(); } +void rawLayer(const std::string& layerName, bool area) { return osmLuaProcessing->Layer(layerName, area); } +void rawLayerAsCentroid(const std::string &layerName) { return osmLuaProcessing->LayerAsCentroid(layerName); } +void rawMinZoom(const double z) { return osmLuaProcessing->MinZoom(z); } +void rawZOrder(const double z) { return osmLuaProcessing->ZOrder(z); } +kaguya::optional rawNextRelation() { return osmLuaProcessing->NextRelation(); } +void rawRestartRelations() { return osmLuaProcessing->RestartRelations(); } +std::string rawFindInRelation(const std::string& key) { return osmLuaProcessing->FindInRelation(key); } +void rawAccept() { return osmLuaProcessing->Accept(); } +double rawAreaIntersecting(const std::string& layerName) { return osmLuaProcessing->AreaIntersecting(layerName); } +std::vector rawCentroid() { return osmLuaProcessing->Centroid(); } + + bool supportsRemappingShapefiles = false; const std::string EMPTY_STRING = ""; @@ -45,31 +70,41 @@ OsmLuaProcessing::OsmLuaProcessing( g_luaState = &luaState; luaState.setErrorHandler(lua_error_handler); luaState.dofile(luaFile.c_str()); - luaState["OSM"].setClass(kaguya::UserdataMetatable() - .addFunction("Id", &OsmLuaProcessing::Id) - .addFunction("Holds", &OsmLuaProcessing::Holds) - .addFunction("Find", &OsmLuaProcessing::Find) - .addFunction("FindIntersecting", &OsmLuaProcessing::FindIntersecting) - .addFunction("Intersects", &OsmLuaProcessing::Intersects) - .addFunction("FindCovering", &OsmLuaProcessing::FindCovering) - .addFunction("CoveredBy", &OsmLuaProcessing::CoveredBy) - .addFunction("IsClosed", &OsmLuaProcessing::IsClosed) - .addFunction("Area", &OsmLuaProcessing::Area) - .addFunction("AreaIntersecting", &OsmLuaProcessing::AreaIntersecting) - .addFunction("Length", &OsmLuaProcessing::Length) - .addFunction("Centroid", &OsmLuaProcessing::Centroid) - .addFunction("Layer", &OsmLuaProcessing::Layer) - .addFunction("LayerAsCentroid", &OsmLuaProcessing::LayerAsCentroid) - .addOverloadedFunctions("Attribute", &OsmLuaProcessing::Attribute, &OsmLuaProcessing::AttributeWithMinZoom) - .addOverloadedFunctions("AttributeNumeric", &OsmLuaProcessing::AttributeNumeric, &OsmLuaProcessing::AttributeNumericWithMinZoom) - .addOverloadedFunctions("AttributeBoolean", &OsmLuaProcessing::AttributeBoolean, &OsmLuaProcessing::AttributeBooleanWithMinZoom) - .addFunction("MinZoom", &OsmLuaProcessing::MinZoom) - .addFunction("ZOrder", &OsmLuaProcessing::ZOrder) - .addFunction("Accept", &OsmLuaProcessing::Accept) - .addFunction("NextRelation", &OsmLuaProcessing::NextRelation) - .addFunction("RestartRelations", &OsmLuaProcessing::RestartRelations) - .addFunction("FindInRelation", &OsmLuaProcessing::FindInRelation) + + osmLuaProcessing = this; + luaState["Id"] = &rawId; + luaState["Holds"] = &rawHolds; + luaState["Find"] = &rawFind; + luaState["FindIntersecting"] = &rawFindIntersecting; + luaState["Intersects"] = &rawIntersects; + luaState["FindCovering"] = &rawFindCovering; + luaState["CoveredBy"] = &rawCoveredBy; + luaState["IsClosed"] = &rawIsClosed; + luaState["Area"] = &rawArea; + luaState["AreaIntersecting"] = &rawAreaIntersecting; + luaState["Length"] = &rawLength; + luaState["Centroid"] = &rawCentroid; + luaState["Layer"] = &rawLayer; + luaState["LayerAsCentroid"] = &rawLayerAsCentroid; + luaState["Attribute"] = kaguya::overload( + [](const std::string &key, const std::string& val) { osmLuaProcessing->Attribute(key, val); }, + [](const std::string &key, const std::string& val, const char minzoom) { osmLuaProcessing->AttributeWithMinZoom(key, val, minzoom); } ); + luaState["AttributeNumeric"] = kaguya::overload( + [](const std::string &key, const float val) { osmLuaProcessing->AttributeNumeric(key, val); }, + [](const std::string &key, const float val, const char minzoom) { osmLuaProcessing->AttributeNumericWithMinZoom(key, val, minzoom); } + ); + luaState["AttributeBoolean"] = kaguya::overload( + [](const std::string &key, const bool val) { osmLuaProcessing->AttributeBoolean(key, val); }, + [](const std::string &key, const bool val, const char minzoom) { osmLuaProcessing->AttributeBooleanWithMinZoom(key, val, minzoom); } + ); + + luaState["MinZoom"] = &rawMinZoom; + luaState["ZOrder"] = &rawZOrder; + luaState["Accept"] = &rawAccept; + luaState["NextRelation"] = &rawNextRelation; + luaState["RestartRelations"] = &rawRestartRelations; + luaState["FindInRelation"] = &rawFindInRelation; supportsRemappingShapefiles = !!luaState["attribute_function"]; supportsReadingRelations = !!luaState["relation_scan_function"]; supportsWritingRelations = !!luaState["relation_function"]; @@ -578,7 +613,7 @@ bool OsmLuaProcessing::scanRelation(WayID id, const tag_map_t &tags) { isRelation = true; currentTags = &tags; try { - luaState["relation_scan_function"](this); + luaState["relation_scan_function"](); } catch(luaProcessingException &e) { std::cerr << "Lua error on scanning relation " << originalOsmID << std::endl; exit(1); @@ -605,7 +640,7 @@ void OsmLuaProcessing::setNode(NodeID id, LatpLon node, const tag_map_t &tags) { //Start Lua processing for node try { - luaState["node_function"](this); + luaState["node_function"](); } catch(luaProcessingException &e) { std::cerr << "Lua error on node " << originalOsmID << std::endl; exit(1); @@ -654,7 +689,7 @@ bool OsmLuaProcessing::setWay(WayID wayId, LatpLonVec const &llVec, const tag_ma //Start Lua processing for way try { kaguya::LuaFunction way_function = luaState["way_function"]; - kaguya::LuaRef ret = way_function(this); + kaguya::LuaRef ret = way_function(); assert(!ret); } catch(luaProcessingException &e) { std::cerr << "Lua error on way " << originalOsmID << std::endl; @@ -688,7 +723,7 @@ void OsmLuaProcessing::setRelation(int64_t relationId, WayVec const &outerWayVec // Start Lua processing for relation if (!isNativeMP && !supportsWritingRelations) return; try { - luaState[isNativeMP ? "way_function" : "relation_function"](this); + luaState[isNativeMP ? "way_function" : "relation_function"](); } catch(luaProcessingException &e) { std::cerr << "Lua error on relation " << originalOsmID << std::endl; exit(1); From c4518f3ccaf87d091542a2ca695499c957cf11a1 Mon Sep 17 00:00:00 2001 From: Colin Dellow Date: Sun, 3 Dec 2023 17:16:32 -0500 Subject: [PATCH 2/7] faster tag map, faster Find()/Holds(), avoid mallocs Cherry-picked from https://github.com/systemed/tilemaker/pull/604/commits/b3221667a9d2366410dbfdc7f25f3062d7a135ef, https://github.com/systemed/tilemaker/pull/604/commits/5c807a9841b866c6dc403141effd4c9d14459034, https://github.com/systemed/tilemaker/pull/604/commits/13b3465f1c80052aa2d622e3915af08b8c5eae9a and fixed up to work with protozero's data_view structure. Original commit messages below, the timings will vary but the idea is the same: Faster tagmap ===== Building a std::map for tags is somewhat expensive, especially when we know that the number of tags is usually quite small. Instead, use a custom structure that does a crappy-but-fast hash to put the keys/values in one of 16 buckets, then linear search the bucket. For GB, before: ``` real 1m11.507s user 16m49.604s sys 0m17.381s ``` After: ``` real 1m9.557s user 16m28.826s sys 0m17.937s ``` Saving 2 seconds of wall clock and 20 seconds of user time doesn't seem like much, but (a) it's not nothing and (b) having the tags in this format will enable us to thwart some of Lua's defensive copies in a subsequent commit. A note about the hash function: hashing each letter of the string using boost::hash_combine eliminated the time savings. Faster Find()/Holds() ===== We (ab?)use kaguya's parameter serialization machinery. Rather than take a `std::string`, we take a `KnownTagKey` and teach Lua how to convert a Lua string into a `KnownTagKey`. This avoids the need to do a defensive copy of the string when coming from Lua. It provides a modest boost: ``` real 1m8.859s user 16m13.292s sys 0m18.104s ``` Most keys are short enough to fit in the small-string optimization, so this doesn't help us avoid mallocs. An exception is `addr:housenumber`, which, at 16 bytes, exceeds g++'s limit of 15 bytes. It should be possible to also apply a similar trick to the `Attribute(...)` functions, to avoid defensive copies of strings that we've seen as keys or values. avoid malloc for Attribute with long strings ===== After: ``` real 1m8.124s user 16m6.620s sys 0m16.808s ``` Looks like we're solidly into diminishing returns at this point. --- CMakeLists.txt | 1 + Makefile | 1 + include/osm_lua_processing.h | 31 +++++--- include/pbf_processor.h | 9 +-- include/tag_map.h | 56 +++++++++++++++ src/osm_lua_processing.cpp | 127 +++++++++++++++++++++++++------- src/pbf_processor.cpp | 22 +++--- src/tag_map.cpp | 135 +++++++++++++++++++++++++++++++++++ 8 files changed, 331 insertions(+), 51 deletions(-) create mode 100644 include/tag_map.h create mode 100644 src/tag_map.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 04193c4d..423980c8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -96,6 +96,7 @@ file(GLOB tilemaker_src_files src/shp_mem_tiles.cpp src/sorted_node_store.cpp src/sorted_way_store.cpp + src/tag_map.cpp src/tile_data.cpp src/tilemaker.cpp src/tile_worker.cpp diff --git a/Makefile b/Makefile index 0b3db1de..d98b5df4 100644 --- a/Makefile +++ b/Makefile @@ -120,6 +120,7 @@ tilemaker: \ src/shp_mem_tiles.o \ src/sorted_node_store.o \ src/sorted_way_store.o \ + src/tag_map.o \ src/tile_data.o \ src/tilemaker.o \ src/tile_worker.o \ diff --git a/include/osm_lua_processing.h b/include/osm_lua_processing.h index 54a939a4..8df5296d 100644 --- a/include/osm_lua_processing.h +++ b/include/osm_lua_processing.h @@ -17,6 +17,8 @@ #include +class TagMap; + // Lua extern "C" { #include "lua.h" @@ -32,6 +34,19 @@ extern bool verbose; class AttributeStore; class AttributeSet; +// A string, which might be in `currentTags` as a value. If Lua +// code refers to an absent value, it'll fallback to passing +// it as a std::string. +// +// The intent is that Attribute("name", Find("name")) is a common +// pattern, and we ought to avoid marshalling a string back and +// forth from C++ to Lua when possible. +struct PossiblyKnownTagValue { + bool found; + uint32_t index; + std::string fallback; +}; + /** \brief OsmLuaProcessing - converts OSM objects into OutputObjects. @@ -75,31 +90,25 @@ class OsmLuaProcessing { using tag_map_t = boost::container::flat_map; // Scan non-MP relation - bool scanRelation(WayID id, const tag_map_t &tags); + bool scanRelation(WayID id, const TagMap& tags); /// \brief We are now processing a significant node - void setNode(NodeID id, LatpLon node, const tag_map_t &tags); + void setNode(NodeID id, LatpLon node, const TagMap& tags); /// \brief We are now processing a way - bool setWay(WayID wayId, LatpLonVec const &llVec, const tag_map_t &tags); + bool setWay(WayID wayId, LatpLonVec const &llVec, const TagMap& tags); /** \brief We are now processing a relation * (note that we store relations as ways with artificial IDs, and that * we use decrementing positive IDs to give a bit more space for way IDs) */ - void setRelation(int64_t relationId, WayVec const &outerWayVec, WayVec const &innerWayVec, const tag_map_t &tags, bool isNativeMP, bool isInnerOuter); + void setRelation(int64_t relationId, WayVec const &outerWayVec, WayVec const &innerWayVec, const TagMap& tags, bool isNativeMP, bool isInnerOuter); // ---- Metadata queries called from Lua // Get the ID of the current object std::string Id() const; - // Check if there's a value for a given key - bool Holds(const std::string& key) const; - - // Get an OSM tag for a given key (or return empty string if none) - const std::string Find(const std::string& key) const; - // ---- Spatial queries called from Lua // Find intersecting shapefile layer @@ -200,6 +209,7 @@ class OsmLuaProcessing { inline AttributeStore &getAttributeStore() { return attributeStore; } struct luaProcessingException :std::exception {}; + const TagMap* currentTags; private: /// Internal: clear current cached state @@ -259,7 +269,6 @@ class OsmLuaProcessing { class LayerDefinition &layers; std::vector> outputs; // All output objects that have been created - const boost::container::flat_map* currentTags; std::vector finalizeOutputs(); diff --git a/include/pbf_processor.h b/include/pbf_processor.h index d79d1ca5..74e0ce35 100644 --- a/include/pbf_processor.h +++ b/include/pbf_processor.h @@ -9,6 +9,7 @@ #include #include "osm_store.h" #include "pbf_reader.h" +#include "tag_map.h" #include class OsmLuaProcessing; @@ -62,16 +63,12 @@ class PbfProcessor ); // Read tags into a map from a way/node/relation - using tag_map_t = boost::container::flat_map; template - void readTags(T& pbfObject, const PbfReader::PrimitiveBlock& pb, tag_map_t& tags) { - tags.reserve(pbfObject.keys.size()); + void readTags(T &pbfObject, PbfReader::PrimitiveBlock const &pb, TagMap& tags) { for (uint n=0; n < pbfObject.keys.size(); n++) { auto keyIndex = pbfObject.keys[n]; auto valueIndex = pbfObject.vals[n]; - protozero::data_view key = pb.stringTable[keyIndex]; - protozero::data_view value = pb.stringTable[valueIndex]; - tags[key] = value; + tags.addTag(pb.stringTable[keyIndex], pb.stringTable[valueIndex]); } } diff --git a/include/tag_map.h b/include/tag_map.h new file mode 100644 index 00000000..f951d9e9 --- /dev/null +++ b/include/tag_map.h @@ -0,0 +1,56 @@ +#ifndef _TAG_MAP_H +#define _TAG_MAP_H + +#include +#include +#include +#include + +// We track tags in a special structure, which enables some tricks when +// doing Lua interop. +// +// The alternative is a std::map - but often, our map is quite small. +// It's preferable to have a small set of vectors and do linear search. +// +// Further, we can avoid passing std::string from Lua -> C++ in some cases +// by first checking to see if the string we would have passed is already +// stored in our tag map, and passing a reference to its location. + +// Assumptions: +// 1. Not thread-safe +// This is OK because we have 1 instance of OsmLuaProcessing per thread. +// 2. Lifetime of map is less than lifetime of keys/values that are passed +// This is true since the strings are owned by the protobuf block reader +// 3. Max number of tag values will fit in a short +// OSM limit is 5,000 tags per object +class TagMap { +public: + TagMap(); + void reset(); + + void addTag(const protozero::data_view& key, const protozero::data_view& value); + + // Return -1 if key not found, else return its keyLoc. + int64_t getKey(const char* key, size_t size) const; + + // Return -1 if value not found, else return its keyLoc. + int64_t getValue(const char* key, size_t size) const; + + const protozero::data_view* getValueFromKey(uint32_t keyLoc) const; + const protozero::data_view* getValue(uint32_t valueLoc) const; + + boost::container::flat_map exportToBoostMap() const; + +private: + uint32_t ensureString( + std::vector>& vector, + const protozero::data_view& value + ); + + + std::vector> keys; + std::vector> key2value; + std::vector> values; +}; + +#endif _TAG_MAP_H diff --git a/src/osm_lua_processing.cpp b/src/osm_lua_processing.cpp index 51e084cd..969993cb 100644 --- a/src/osm_lua_processing.cpp +++ b/src/osm_lua_processing.cpp @@ -5,16 +5,108 @@ #include "helpers.h" #include "coordinates_geom.h" #include "osm_mem_tiles.h" - +#include "tag_map.h" using namespace std; +const std::string EMPTY_STRING = ""; thread_local kaguya::State *g_luaState = nullptr; thread_local OsmLuaProcessing* osmLuaProcessing = nullptr; +// A key in `currentTags`. If Lua code refers to an absent key, +// found will be false. +struct KnownTagKey { + bool found; + uint32_t index; +}; + +template<> struct kaguya::lua_type_traits { + typedef KnownTagKey get_type; + typedef const KnownTagKey& push_type; + + static bool strictCheckType(lua_State* l, int index) + { + return lua_type(l, index) == LUA_TSTRING; + } + static bool checkType(lua_State* l, int index) + { + return lua_isstring(l, index) != 0; + } + static get_type get(lua_State* l, int index) + { + KnownTagKey rv = { false, 0 }; + size_t size = 0; + const char* buffer = lua_tolstring(l, index, &size); + + int64_t tagLoc = osmLuaProcessing->currentTags->getKey(buffer, size); + + if (tagLoc >= 0) { + rv.found = true; + rv.index = tagLoc; + } +// std::string key(buffer, size); +// std::cout << "for key " << key << ": rv.found=" << rv.found << ", rv.index=" << rv.index << std::endl; + return rv; + } + static int push(lua_State* l, push_type s) + { + throw std::runtime_error("Lua code doesn't know how to use KnownTagKey"); + } +}; + +template<> struct kaguya::lua_type_traits { + typedef PossiblyKnownTagValue get_type; + typedef const PossiblyKnownTagValue& push_type; + + static bool strictCheckType(lua_State* l, int index) + { + return lua_type(l, index) == LUA_TSTRING; + } + static bool checkType(lua_State* l, int index) + { + return lua_isstring(l, index) != 0; + } + static get_type get(lua_State* l, int index) + { + PossiblyKnownTagValue rv = { false, 0 }; + size_t size = 0; + const char* buffer = lua_tolstring(l, index, &size); + + // For long strings where we might need to do a malloc, see if we + // can instead pass a pointer to a value from this object's tag + // map. + // + // 15 is the threshold where gcc no longer applies the small string + // optimization. + if (size > 15) { + int64_t tagLoc = osmLuaProcessing->currentTags->getValue(buffer, size); + + if (tagLoc >= 0) { + rv.found = true; + rv.index = tagLoc; + return rv; + } + } + + rv.fallback = std::string(buffer, size); + return rv; + } + static int push(lua_State* l, push_type s) + { + throw std::runtime_error("Lua code doesn't know how to use PossiblyKnownTagValue"); + } +}; + std::string rawId() { return osmLuaProcessing->Id(); } -bool rawHolds(const std::string& key) { return osmLuaProcessing->Holds(key); } -const std::string& rawFind(const std::string& key) { return osmLuaProcessing->Find(key); } +bool rawHolds(const KnownTagKey& key) { return key.found; } +const std::string rawFind(const KnownTagKey& key) { + if (key.found) { + auto value = *(osmLuaProcessing->currentTags->getValueFromKey(key.index)); + return std::string(value.data(), value.size()); + } + + return EMPTY_STRING; +} std::vector rawFindIntersecting(const std::string &layerName) { return osmLuaProcessing->FindIntersecting(layerName); } bool rawIntersects(const std::string& layerName) { return osmLuaProcessing->Intersects(layerName); } std::vector rawFindCovering(const std::string& layerName) { return osmLuaProcessing->FindCovering(layerName); } @@ -36,7 +128,6 @@ std::vector rawCentroid() { return osmLuaProcessing->Centroid(); } bool supportsRemappingShapefiles = false; -const std::string EMPTY_STRING = ""; int lua_error_handler(int errCode, const char *errMessage) { @@ -156,18 +247,6 @@ string OsmLuaProcessing::Id() const { return to_string(originalOsmID); } -// Check if there's a value for a given key -bool OsmLuaProcessing::Holds(const string& key) const { - return currentTags->find(key) != currentTags->end(); -} - -// Get an OSM tag for a given key (or return empty string if none) -const string OsmLuaProcessing::Find(const string& key) const { - auto it = currentTags->find(key); - if(it == currentTags->end()) return EMPTY_STRING; - return std::string(it->second.data(), it->second.size()); -} - // ---- Spatial queries called from Lua vector OsmLuaProcessing::FindIntersecting(const string &layerName) { @@ -606,7 +685,7 @@ void OsmLuaProcessing::setVectorLayerMetadata(const uint_least8_t layer, const s // Scan relation (but don't write geometry) // return true if we want it, false if we don't -bool OsmLuaProcessing::scanRelation(WayID id, const tag_map_t &tags) { +bool OsmLuaProcessing::scanRelation(WayID id, const TagMap& tags) { reset(); originalOsmID = id; isWay = false; @@ -620,15 +699,13 @@ bool OsmLuaProcessing::scanRelation(WayID id, const tag_map_t &tags) { } if (!relationAccepted) return false; - boost::container::flat_map m; - for (const auto& i : tags) { - m[std::string(i.first.data(), i.first.size())] = std::string(i.second.data(), i.second.size()); - } - osmStore.store_relation_tags(id, m); + // If we're persisting, we need to make a real map that owns its + // own keys and values. + osmStore.store_relation_tags(id, tags.exportToBoostMap()); return true; } -void OsmLuaProcessing::setNode(NodeID id, LatpLon node, const tag_map_t &tags) { +void OsmLuaProcessing::setNode(NodeID id, LatpLon node, const TagMap& tags) { reset(); originalOsmID = id; @@ -656,7 +733,7 @@ void OsmLuaProcessing::setNode(NodeID id, LatpLon node, const tag_map_t &tags) { } // We are now processing a way -bool OsmLuaProcessing::setWay(WayID wayId, LatpLonVec const &llVec, const tag_map_t &tags) { +bool OsmLuaProcessing::setWay(WayID wayId, LatpLonVec const &llVec, const TagMap& tags) { reset(); wayEmitted = false; originalOsmID = wayId; @@ -706,7 +783,7 @@ bool OsmLuaProcessing::setWay(WayID wayId, LatpLonVec const &llVec, const tag_ma } // We are now processing a relation -void OsmLuaProcessing::setRelation(int64_t relationId, WayVec const &outerWayVec, WayVec const &innerWayVec, const tag_map_t &tags, +void OsmLuaProcessing::setRelation(int64_t relationId, WayVec const &outerWayVec, WayVec const &innerWayVec, const TagMap& tags, bool isNativeMP, // only OSM type=multipolygon bool isInnerOuter) { // any OSM relation with "inner" and "outer" roles (e.g. type=multipolygon|boundary) reset(); diff --git a/src/pbf_processor.cpp b/src/pbf_processor.cpp index 0cf0f1d9..a912b8ee 100644 --- a/src/pbf_processor.cpp +++ b/src/pbf_processor.cpp @@ -28,6 +28,8 @@ bool PbfProcessor::ReadNodes(OsmLuaProcessing& output, PbfReader::PrimitiveGroup { // ---- Read nodes std::vector nodes; + TagMap tags; + for (auto& node : pg.nodes()) { NodeID nodeId = node.id; @@ -45,17 +47,15 @@ bool PbfProcessor::ReadNodes(OsmLuaProcessing& output, PbfReader::PrimitiveGroup nodes.push_back(std::make_pair(static_cast(nodeId), latplon)); if (significant) { + tags.reset(); // For tagged nodes, call Lua, then save the OutputObject - boost::container::flat_map tags; - tags.reserve((node.tagEnd - node.tagStart) / 2); - for (int n = node.tagStart; n < node.tagEnd; n += 2) { auto keyIndex = pg.translateNodeKeyValue(n); auto valueIndex = pg.translateNodeKeyValue(n + 1); - protozero::data_view key{pb.stringTable[keyIndex].data(), pb.stringTable[keyIndex].size()}; - protozero::data_view value{pb.stringTable[valueIndex].data(), pb.stringTable[valueIndex].size()}; - tags[key] = value; + const protozero::data_view& key = pb.stringTable[keyIndex]; + const protozero::data_view& value = pb.stringTable[valueIndex]; + tags.addTag(key, value); } output.setNode(static_cast(nodeId), latplon, tags); } @@ -84,6 +84,7 @@ bool PbfProcessor::ReadWays( std::vector llWays; std::vector>> nodeWays; + TagMap tags; LatpLonVec llVec; std::vector nodeVec; @@ -131,7 +132,7 @@ bool PbfProcessor::ReadWays( if (llVec.empty()) continue; try { - tag_map_t tags; + tags.reset(); readTags(pbfWay, pb, tags); bool emitted = output.setWay(static_cast(pbfWay.id), llVec, tags); @@ -164,6 +165,8 @@ bool PbfProcessor::ScanRelations(OsmLuaProcessing& output, PbfReader::PrimitiveG if (pg.relations().empty()) return false; + TagMap tags; + int typeKey = findStringPosition(pb, "type"); int mpKey = findStringPosition(pb, "multipolygon"); @@ -173,7 +176,7 @@ bool PbfProcessor::ScanRelations(OsmLuaProcessing& output, PbfReader::PrimitiveG WayID relid = static_cast(pbfRelation.id); if (!isMultiPolygon) { if (output.canReadRelations()) { - tag_map_t tags; + tags.reset(); readTags(pbfRelation, pb, tags); isAccepted = output.scanRelation(relid, tags); } @@ -202,6 +205,7 @@ bool PbfProcessor::ReadRelations( if (pg.relations().empty()) return false; + TagMap tags; std::vector relations; int typeKey = findStringPosition(pb, "type"); @@ -245,7 +249,7 @@ bool PbfProcessor::ReadRelations( continue; try { - tag_map_t tags; + tags.reset(); readTags(pbfRelation, pb, tags); output.setRelation(pbfRelation.id, outerWayVec, innerWayVec, tags, isMultiPolygon, isInnerOuter); diff --git a/src/tag_map.cpp b/src/tag_map.cpp new file mode 100644 index 00000000..8fc02a96 --- /dev/null +++ b/src/tag_map.cpp @@ -0,0 +1,135 @@ +#include "tag_map.h" +#include +#include + +TagMap::TagMap() { + keys.resize(16); + key2value.resize(16); + values.resize(16); +} + +void TagMap::reset() { + for (int i = 0; i < 16; i++) { + keys[i].clear(); + key2value[i].clear(); + values[i].clear(); + } +} + +const std::size_t hashString(const std::string& str) { + // This is a pretty crappy hash function in terms of bit + // avalanching and distribution of output values. + // + // But it's very good in terms of speed, which turns out + // to be the important measure. + std::size_t hash = str.size(); + if (hash >= 4) + hash ^= *(uint32_t*)str.data(); + + return hash; +} + +const std::size_t hashString(const char* str, size_t size) { + // This is a pretty crappy hash function in terms of bit + // avalanching and distribution of output values. + // + // But it's very good in terms of speed, which turns out + // to be the important measure. + std::size_t hash = size; + if (hash >= 4) + hash ^= *(uint32_t*)str; + + return hash; +} + +uint32_t TagMap::ensureString( + std::vector>& vector, + const protozero::data_view& value +) { + std::size_t hash = hashString(value.data(), value.size()); + + const uint16_t shard = hash % vector.size(); + for (int i = 0; i < vector[shard].size(); i++) + if (*(vector[shard][i]) == value) + return shard << 16 | i; + + vector[shard].push_back(&value); + return shard << 16 | (vector[shard].size() - 1); +} + + +void TagMap::addTag(const protozero::data_view& key, const protozero::data_view& value) { + uint32_t valueLoc = ensureString(values, value); +// std::cout << "valueLoc = " << valueLoc << std::endl; + uint32_t keyLoc = ensureString(keys, key); +// std::cout << "keyLoc = " << keyLoc << std::endl; + + + const uint16_t shard = keyLoc >> 16; + const uint16_t pos = keyLoc; +// std::cout << "shard=" << shard << ", pos=" << pos << std::endl; + if (key2value[shard].size() <= pos) { +// std::cout << "growing shard" << std::endl; + key2value[shard].resize(pos + 1); + } + + key2value[shard][pos] = valueLoc; +} + +int64_t TagMap::getKey(const char* key, size_t size) const { + // Return -1 if key not found, else return its keyLoc. + std::size_t hash = hashString(key, size); + + const uint16_t shard = hash % keys.size(); + for (int i = 0; i < keys[shard].size(); i++) { + const protozero::data_view& candidate = *keys[shard][i]; + if (candidate.size() != size) + continue; + + if (memcmp(candidate.data(), key, size) == 0) + return shard << 16 | i; + } + + return -1; +} + +int64_t TagMap::getValue(const char* value, size_t size) const { + // Return -1 if value not found, else return its valueLoc. + std::size_t hash = hashString(value, size); + + const uint16_t shard = hash % values.size(); + for (int i = 0; i < values[shard].size(); i++) { + const protozero::data_view& candidate = *values[shard][i]; + if (candidate.size() != size) + continue; + + if (memcmp(candidate.data(), value, size) == 0) + return shard << 16 | i; + } + + return -1; +} + +const protozero::data_view* TagMap::getValueFromKey(uint32_t keyLoc) const { + const uint32_t valueLoc = key2value[keyLoc >> 16][keyLoc & 0xFFFF]; + return values[valueLoc >> 16][valueLoc & 0xFFFF]; +} + +const protozero::data_view* TagMap::getValue(uint32_t valueLoc) const { + return values[valueLoc >> 16][valueLoc & 0xFFFF]; +} + +boost::container::flat_map TagMap::exportToBoostMap() const { + boost::container::flat_map rv; + + for (int i = 0; i < keys.size(); i++) { + for (int j = 0; j < keys[i].size(); j++) { + uint32_t valueLoc = key2value[i][j]; + auto key = *keys[i][j]; + auto value = *values[valueLoc >> 16][valueLoc & 0xFFFF]; + rv[std::string(key.data(), key.size())] = std::string(value.data(), value.size()); + } + } + + return rv; +} From 89f43ea7f3c7f71fa35226f4e9d13639697cdca6 Mon Sep 17 00:00:00 2001 From: Colin Dellow Date: Mon, 25 Dec 2023 09:32:50 -0500 Subject: [PATCH 3/7] try to avoid lock contention on AttributeStore On a 48-core machine, I still see lots of lock contention. AttributeStore:add is one place. Add a thread-local cache that can be consulted without taking the shared lock. The intuition here is that there are 1.3B objects, and 40M attribute sets. Thus, on average, an attribute set is reused 32 times. However, average is probably misleading -- the distribution is likely not uniform, e.g. the median attribute set is probably reused 1-2 times, and some exceptional attribute sets (e.g. `natural=tree` are reused thousands of times). For GB on a 16-core machine, this avoids 27M of 36M locks. --- include/attribute_store.h | 1 + include/deque_map.h | 6 +++++- src/attribute_store.cpp | 44 +++++++++++++++++++++++++++++++++------ test/deque_map.test.cpp | 4 ++++ 4 files changed, 48 insertions(+), 7 deletions(-) diff --git a/include/attribute_store.h b/include/attribute_store.h index 3aea19cf..95c4cabb 100644 --- a/include/attribute_store.h +++ b/include/attribute_store.h @@ -418,6 +418,7 @@ struct AttributeStore { mutable std::vector setsMutex; mutable std::mutex mutex; + std::atomic lookupsUncached; std::atomic lookups; }; diff --git a/include/deque_map.h b/include/deque_map.h index bcb4ddbc..ea57f669 100644 --- a/include/deque_map.h +++ b/include/deque_map.h @@ -95,7 +95,11 @@ class DequeMap { return -1; } - const T& at(uint32_t index) const { + inline const T& operator[](uint32_t index) const { + return objects[index]; + } + + inline const T& at(uint32_t index) const { return objects.at(index); } diff --git a/src/attribute_store.cpp b/src/attribute_store.cpp index 6fbacbe9..7ce784b8 100644 --- a/src/attribute_store.cpp +++ b/src/attribute_store.cpp @@ -73,18 +73,18 @@ const AttributePair& AttributePairStore::getPair(uint32_t i) const { if (shard == 0) { if (offset < tlsHotShard.size()) - return tlsHotShard.at(offset); + return tlsHotShard[offset]; { std::lock_guard lock(pairsMutex[0]); tlsHotShard = pairs[0]; } - return tlsHotShard.at(offset); + return tlsHotShard[offset]; } std::lock_guard lock(pairsMutex[shard]); - return pairs[shard].at(offset); + return pairs[shard][offset]; }; const AttributePair& AttributePairStore::getPairUnsafe(uint32_t i) const { @@ -94,7 +94,7 @@ const AttributePair& AttributePairStore::getPairUnsafe(uint32_t i) const { uint32_t shard = i >> (32 - SHARD_BITS); uint32_t offset = i & (~(~0u << (32 - SHARD_BITS))); - return pairs[shard].at(offset); + return pairs[shard][offset]; }; uint32_t AttributePairStore::addPair(AttributePair& pair, bool isHot) { @@ -263,25 +263,54 @@ void AttributeSet::finalize() { } +// Remember recently queried/added sets so that we can return them in the +// future without taking a lock. +thread_local std::vector cachedAttributeSetPointers(64); +thread_local std::vector cachedAttributeSetIndexes(64); + +thread_local uint64_t tlsLookups = 0; +thread_local uint64_t tlsLookupsUncached = 0; AttributeIndex AttributeStore::add(AttributeSet &attributes) { // TODO: there's probably a way to use C++ types to distinguish a finalized // and non-finalized AttributeSet, which would make this safer. attributes.finalize(); size_t hash = attributes.hash(); + + const size_t candidateIndex = hash % cachedAttributeSetPointers.size(); + // Before taking a lock, see if we've seen this attribute set recently. + + tlsLookups++; + if (tlsLookups % 1024 == 0) { + lookups += 1024; + } + + + { + const AttributeSet* candidate = cachedAttributeSetPointers[candidateIndex]; + + if (candidate != nullptr && *candidate == attributes) + return cachedAttributeSetIndexes[candidateIndex]; + } + size_t shard = hash % ATTRIBUTE_SHARDS; // We can't use the top 2 bits (see OutputObject's bitfields) shard = shard >> 2; std::lock_guard lock(setsMutex[shard]); - lookups++; + tlsLookupsUncached++; + if (tlsLookupsUncached % 1024 == 0) + lookupsUncached += 1024; const uint32_t offset = sets[shard].add(attributes); if (offset >= (1 << (32 - SHARD_BITS))) throw std::out_of_range("set shard overflow"); uint32_t rv = (shard << (32 - SHARD_BITS)) + offset; + + cachedAttributeSetPointers[candidateIndex] = &sets[shard][offset]; + cachedAttributeSetIndexes[candidateIndex] = rv; return rv; } @@ -317,7 +346,7 @@ size_t AttributeStore::size() const { } void AttributeStore::reportSize() const { - std::cout << "Attributes: " << size() << " sets from " << lookups.load() << " objects" << std::endl; + std::cout << "Attributes: " << size() << " sets from " << lookups.load() << " objects (" << lookupsUncached.load() << " uncached)" << std::endl; // Print detailed histogram of frequencies of attributes. if (false) { @@ -380,6 +409,9 @@ void AttributeStore::reset() { tlsKeys2IndexSize = 0; tlsHotShard.clear(); + + for (int i = 0; i < cachedAttributeSetPointers.size(); i++) + cachedAttributeSetPointers[i] = nullptr; } void AttributeStore::finalize() { diff --git a/test/deque_map.test.cpp b/test/deque_map.test.cpp index 23a3d3cc..28023542 100644 --- a/test/deque_map.test.cpp +++ b/test/deque_map.test.cpp @@ -25,9 +25,13 @@ MU_TEST(test_deque_map) { mu_check(strs.size() == 4); mu_check(strs.at(0) == "foo"); + mu_check(strs[0] == "foo"); mu_check(strs.at(1) == "bar"); + mu_check(strs[1] == "bar"); mu_check(strs.at(2) == "aardvark"); + mu_check(strs[2] == "aardvark"); mu_check(strs.at(3) == "quux"); + mu_check(strs[3] == "quux"); std::vector rv; for (std::string x : strs) { From c87497dfa2149413a94f08de8d776dec7578af04 Mon Sep 17 00:00:00 2001 From: Colin Dellow Date: Mon, 25 Dec 2023 09:58:25 -0500 Subject: [PATCH 4/7] RelationScanStore: more granular locks On a 48-core machine, this phase currently achieves only 400% CPU usage, I think due to these locks --- include/osm_store.h | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/include/osm_store.h b/include/osm_store.h index b8607543..5bb74272 100644 --- a/include/osm_store.h +++ b/include/osm_store.h @@ -81,37 +81,39 @@ class RelationScanStore { private: using tag_map_t = boost::container::flat_map; - std::map> relationsForWays; - std::map relationTags; - mutable std::mutex mutex; + std::vector>> relationsForWays; + std::vector> relationTags; + mutable std::vector mutex; public: + RelationScanStore(): relationsForWays(128), relationTags(128), mutex(128) {} void relation_contains_way(WayID relid, WayID wayid) { - std::lock_guard lock(mutex); - relationsForWays[wayid].emplace_back(relid); + const size_t shard = wayid % mutex.size(); + + std::lock_guard lock(mutex[shard]); + relationsForWays[shard][wayid].emplace_back(relid); } void store_relation_tags(WayID relid, const tag_map_t &tags) { - std::lock_guard lock(mutex); - relationTags[relid] = tags; + const size_t shard = relid % mutex.size(); + std::lock_guard lock(mutex[shard]); + relationTags[shard][relid] = tags; } bool way_in_any_relations(WayID wayid) { - return relationsForWays.find(wayid) != relationsForWays.end(); + const size_t shard = wayid % mutex.size(); + return relationsForWays[shard].find(wayid) != relationsForWays[shard].end(); } std::vector relations_for_way(WayID wayid) { - return relationsForWays[wayid]; + const size_t shard = wayid % mutex.size(); + return relationsForWays[shard][wayid]; } std::string get_relation_tag(WayID relid, const std::string &key) { - auto it = relationTags.find(relid); - if (it==relationTags.end()) return ""; + const size_t shard = relid % mutex.size(); + auto it = relationTags[shard].find(relid); + if (it==relationTags[shard].end()) return ""; auto jt = it->second.find(key); if (jt==it->second.end()) return ""; return jt->second; } - void clear() { - std::lock_guard lock(mutex); - relationsForWays.clear(); - relationTags.clear(); - } }; From f6807c4a2c4f9f928b5357e19d2d24cc956b9bc7 Mon Sep 17 00:00:00 2001 From: Colin Dellow Date: Mon, 25 Dec 2023 14:17:29 -0500 Subject: [PATCH 5/7] move duplicate attribute handling outside of locks --- include/osm_lua_processing.h | 3 +++ src/attribute_store.cpp | 3 --- src/osm_lua_processing.cpp | 18 ++++++++++++++++++ 3 files changed, 21 insertions(+), 3 deletions(-) diff --git a/include/osm_lua_processing.h b/include/osm_lua_processing.h index 8df5296d..0ef1861d 100644 --- a/include/osm_lua_processing.h +++ b/include/osm_lua_processing.h @@ -227,6 +227,8 @@ class OsmLuaProcessing { lastStoredGeometryId = 0; } + void removeAttributeIfNeeded(const std::string& key); + const inline Point getPoint() { return Point(lon/10000000.0,latp/10000000.0); } @@ -269,6 +271,7 @@ class OsmLuaProcessing { class LayerDefinition &layers; std::vector> outputs; // All output objects that have been created + std::vector outputKeys; std::vector finalizeOutputs(); diff --git a/src/attribute_store.cpp b/src/attribute_store.cpp index 7ce784b8..3d9ad4e6 100644 --- a/src/attribute_store.cpp +++ b/src/attribute_store.cpp @@ -210,19 +210,16 @@ void AttributeStore::addAttribute(AttributeSet& attributeSet, std::string const PooledString ps(&v); AttributePair kv(keyStore.key2index(key), ps, minzoom); bool isHot = AttributePair::isHot(key, v); - attributeSet.removePairWithKey(pairStore, kv.keyIndex); attributeSet.addPair(pairStore.addPair(kv, isHot)); } void AttributeStore::addAttribute(AttributeSet& attributeSet, std::string const &key, bool v, char minzoom) { AttributePair kv(keyStore.key2index(key),v,minzoom); bool isHot = true; // All bools are eligible to be hot pairs - attributeSet.removePairWithKey(pairStore, kv.keyIndex); attributeSet.addPair(pairStore.addPair(kv, isHot)); } void AttributeStore::addAttribute(AttributeSet& attributeSet, std::string const &key, float v, char minzoom) { AttributePair kv(keyStore.key2index(key),v,minzoom); bool isHot = v >= 0 && v <= 25 && ceil(v) == v; // Whole numbers in 0..25 are eligible to be hot pairs - attributeSet.removePairWithKey(pairStore, kv.keyIndex); attributeSet.addPair(pairStore.addPair(kv, isHot)); } diff --git a/src/osm_lua_processing.cpp b/src/osm_lua_processing.cpp index 969993cb..072f8359 100644 --- a/src/osm_lua_processing.cpp +++ b/src/osm_lua_processing.cpp @@ -441,6 +441,7 @@ const MultiPolygon &OsmLuaProcessing::multiPolygonCached() { // Add object to specified layer from Lua void OsmLuaProcessing::Layer(const string &layerName, bool area) { + outputKeys.clear(); if (layers.layerMap.count(layerName) == 0) { throw out_of_range("ERROR: Layer(): a layer named as \"" + layerName + "\" doesn't exist."); } @@ -557,6 +558,7 @@ void OsmLuaProcessing::Layer(const string &layerName, bool area) { } void OsmLuaProcessing::LayerAsCentroid(const string &layerName) { + outputKeys.clear(); if (layers.layerMap.count(layerName) == 0) { throw out_of_range("ERROR: LayerAsCentroid(): a layer named as \"" + layerName + "\" doesn't exist."); } @@ -628,11 +630,25 @@ void OsmLuaProcessing::Accept() { relationAccepted = true; } +void OsmLuaProcessing::removeAttributeIfNeeded(const string& key) { + // Does it exist? + for (int i = 0; i < outputKeys.size(); i++) { + if (outputKeys[i] == key) { + AttributeSet& set = outputs.back().second; + set.removePairWithKey(attributeStore.pairStore, attributeStore.keyStore.key2index(key)); + return; + } + } + + outputKeys.push_back(key); +} + // Set attributes in a vector tile's Attributes table void OsmLuaProcessing::Attribute(const string &key, const string &val) { AttributeWithMinZoom(key,val,0); } void OsmLuaProcessing::AttributeWithMinZoom(const string &key, const string &val, const char minzoom) { if (val.size()==0) { return; } // don't set empty strings if (outputs.size()==0) { ProcessingError("Can't add Attribute if no Layer set"); return; } + removeAttributeIfNeeded(key); attributeStore.addAttribute(outputs.back().second, key, val, minzoom); setVectorLayerMetadata(outputs.back().first.layer, key, 0); } @@ -640,6 +656,7 @@ void OsmLuaProcessing::AttributeWithMinZoom(const string &key, const string &val void OsmLuaProcessing::AttributeNumeric(const string &key, const float val) { AttributeNumericWithMinZoom(key,val,0); } void OsmLuaProcessing::AttributeNumericWithMinZoom(const string &key, const float val, const char minzoom) { if (outputs.size()==0) { ProcessingError("Can't add Attribute if no Layer set"); return; } + removeAttributeIfNeeded(key); attributeStore.addAttribute(outputs.back().second, key, val, minzoom); setVectorLayerMetadata(outputs.back().first.layer, key, 1); } @@ -647,6 +664,7 @@ void OsmLuaProcessing::AttributeNumericWithMinZoom(const string &key, const floa void OsmLuaProcessing::AttributeBoolean(const string &key, const bool val) { AttributeBooleanWithMinZoom(key,val,0); } void OsmLuaProcessing::AttributeBooleanWithMinZoom(const string &key, const bool val, const char minzoom) { if (outputs.size()==0) { ProcessingError("Can't add Attribute if no Layer set"); return; } + removeAttributeIfNeeded(key); attributeStore.addAttribute(outputs.back().second, key, val, minzoom); setVectorLayerMetadata(outputs.back().first.layer, key, 2); } From 515a0211e0e6c1bfb15d8bccb4aa933586888ffc Mon Sep 17 00:00:00 2001 From: Colin Dellow Date: Mon, 25 Dec 2023 16:01:30 -0500 Subject: [PATCH 6/7] add thread-local cache for attributepairs --- include/attribute_store.h | 13 ++++++--- src/attribute_store.cpp | 55 ++++++++++++++++++++++++++++++++------- 2 files changed, 56 insertions(+), 12 deletions(-) diff --git a/include/attribute_store.h b/include/attribute_store.h index 95c4cabb..6f11ba00 100644 --- a/include/attribute_store.h +++ b/include/attribute_store.h @@ -183,11 +183,14 @@ struct AttributePair { #define SHARD_BITS 14 #define ATTRIBUTE_SHARDS (1 << SHARD_BITS) +class AttributeStore; class AttributePairStore { public: AttributePairStore(): finalized(false), - pairsMutex(ATTRIBUTE_SHARDS) + pairsMutex(ATTRIBUTE_SHARDS), + lookups(0), + lookupsUncached(0) { // The "hot" shard has a capacity of 64K, the others are unbounded. pairs.push_back(DequeMap(1 << 16)); @@ -202,9 +205,10 @@ class AttributePairStore { const AttributePair& getPairUnsafe(uint32_t i) const; uint32_t addPair(AttributePair& pair, bool isHot); - std::vector> pairs; private: + friend class AttributeStore; + std::vector> pairs; bool finalized; // We refer to all attribute pairs by index. // @@ -214,6 +218,8 @@ class AttributePairStore { // we suspect will be popular. It only ever has 64KB items, // so that we can reference it with a short. mutable std::vector pairsMutex; + std::atomic lookupsUncached; + std::atomic lookups; }; // AttributeSet is a set of AttributePairs @@ -406,7 +412,8 @@ struct AttributeStore { finalized(false), sets(ATTRIBUTE_SHARDS), setsMutex(ATTRIBUTE_SHARDS), - lookups(0) { + lookups(0), + lookupsUncached(0) { } AttributeKeyStore keyStore; diff --git a/src/attribute_store.cpp b/src/attribute_store.cpp index 3d9ad4e6..363d167b 100644 --- a/src/attribute_store.cpp +++ b/src/attribute_store.cpp @@ -97,6 +97,13 @@ const AttributePair& AttributePairStore::getPairUnsafe(uint32_t i) const { return pairs[shard][offset]; }; +// Remember recently queried/added pairs so that we can return them in the +// future without taking a lock. +thread_local uint64_t tlsPairLookups = 0; +thread_local uint64_t tlsPairLookupsUncached = 0; + +thread_local std::vector cachedAttributePairPointers(64); +thread_local std::vector cachedAttributePairIndexes(64); uint32_t AttributePairStore::addPair(AttributePair& pair, bool isHot) { if (isHot) { { @@ -132,6 +139,23 @@ uint32_t AttributePairStore::addPair(AttributePair& pair, bool isHot) { // Throw it on the pile with the rest of the pairs. size_t hash = pair.hash(); + const size_t candidateIndex = hash % cachedAttributePairPointers.size(); + // Before taking a lock, see if we've seen this attribute pair recently. + + tlsPairLookups++; + if (tlsPairLookups % 1024 == 0) { + lookups += 1024; + } + + + { + const AttributePair* candidate = cachedAttributePairPointers[candidateIndex]; + + if (candidate != nullptr && *candidate == pair) + return cachedAttributePairIndexes[candidateIndex]; + } + + size_t shard = hash % ATTRIBUTE_SHARDS; // Shard 0 is for hot pairs -- pick another shard if it gets selected. if (shard == 0) shard = (hash >> 8) % ATTRIBUTE_SHARDS; @@ -140,9 +164,19 @@ uint32_t AttributePairStore::addPair(AttributePair& pair, bool isHot) { if (shard == 0) shard = 1; std::lock_guard lock(pairsMutex[shard]); + + tlsPairLookupsUncached++; + if (tlsPairLookupsUncached % 1024 == 0) + lookupsUncached += 1024; + const auto& index = pairs[shard].find(pair); - if (index != -1) - return (shard << (32 - SHARD_BITS)) + index; + if (index != -1) { + const uint32_t rv = (shard << (32 - SHARD_BITS)) + index; + cachedAttributePairPointers[candidateIndex] = &pairs[shard][index]; + cachedAttributePairIndexes[candidateIndex] = rv; + + return rv; + } pair.ensureStringIsOwned(); uint32_t offset = pairs[shard].add(pair); @@ -265,8 +299,8 @@ void AttributeSet::finalize() { thread_local std::vector cachedAttributeSetPointers(64); thread_local std::vector cachedAttributeSetIndexes(64); -thread_local uint64_t tlsLookups = 0; -thread_local uint64_t tlsLookupsUncached = 0; +thread_local uint64_t tlsSetLookups = 0; +thread_local uint64_t tlsSetLookupsUncached = 0; AttributeIndex AttributeStore::add(AttributeSet &attributes) { // TODO: there's probably a way to use C++ types to distinguish a finalized // and non-finalized AttributeSet, which would make this safer. @@ -277,8 +311,8 @@ AttributeIndex AttributeStore::add(AttributeSet &attributes) { const size_t candidateIndex = hash % cachedAttributeSetPointers.size(); // Before taking a lock, see if we've seen this attribute set recently. - tlsLookups++; - if (tlsLookups % 1024 == 0) { + tlsSetLookups++; + if (tlsSetLookups % 1024 == 0) { lookups += 1024; } @@ -296,8 +330,8 @@ AttributeIndex AttributeStore::add(AttributeSet &attributes) { shard = shard >> 2; std::lock_guard lock(setsMutex[shard]); - tlsLookupsUncached++; - if (tlsLookupsUncached % 1024 == 0) + tlsSetLookupsUncached++; + if (tlsSetLookupsUncached % 1024 == 0) lookupsUncached += 1024; const uint32_t offset = sets[shard].add(attributes); @@ -343,7 +377,7 @@ size_t AttributeStore::size() const { } void AttributeStore::reportSize() const { - std::cout << "Attributes: " << size() << " sets from " << lookups.load() << " objects (" << lookupsUncached.load() << " uncached)" << std::endl; + std::cout << "Attributes: " << size() << " sets from " << lookups.load() << " objects (" << lookupsUncached.load() << " uncached), " << pairStore.lookups.load() << " pairs (" << pairStore.lookupsUncached.load() << " uncached)" << std::endl; // Print detailed histogram of frequencies of attributes. if (false) { @@ -409,6 +443,9 @@ void AttributeStore::reset() { for (int i = 0; i < cachedAttributeSetPointers.size(); i++) cachedAttributeSetPointers[i] = nullptr; + + for (int i = 0; i < cachedAttributePairPointers.size(); i++) + cachedAttributePairPointers[i] = nullptr; } void AttributeStore::finalize() { From 6ba38b056db63881cf7d99b327898113a0297973 Mon Sep 17 00:00:00 2001 From: Colin Dellow Date: Mon, 25 Dec 2023 17:22:24 -0500 Subject: [PATCH 7/7] buffer objects when object index contended --- include/tile_data.h | 4 ++++ src/tile_data.cpp | 35 +++++++++++++++++++++++++++++++++-- 2 files changed, 37 insertions(+), 2 deletions(-) diff --git a/include/tile_data.h b/include/tile_data.h index 6b59ee3f..b78463e2 100644 --- a/include/tile_data.h +++ b/include/tile_data.h @@ -364,6 +364,8 @@ class TileDataSource { ClipCache multiPolygonClipCache; ClipCache multiLinestringClipCache; + std::deque>> pendingSmallIndexObjects; + public: TileDataSource(size_t threadNum, unsigned int baseZoom, bool includeID); @@ -391,6 +393,8 @@ class TileDataSource { ); void addObjectToSmallIndex(const TileCoordinates& index, const OutputObject& oo, uint64_t id); + void addObjectToSmallIndex(const TileCoordinates& index, const OutputObject& oo, uint64_t id, bool needsLock); + void addObjectToSmallIndexUnsafe(const TileCoordinates& index, const OutputObject& oo, uint64_t id); void addObjectToLargeIndex(const Box& envelope, const OutputObject& oo, uint64_t id) { std::lock_guard lock(mutex); diff --git a/src/tile_data.cpp b/src/tile_data.cpp index f78bbdda..407f534a 100644 --- a/src/tile_data.cpp +++ b/src/tile_data.cpp @@ -73,10 +73,21 @@ TileDataSource::TileDataSource(size_t threadNum, unsigned int baseZoom, bool inc } } +thread_local std::vector>* tlsPendingSmallIndexObjects = nullptr; + void TileDataSource::finalize(size_t threadNum) { + uint64_t finalized = 0; + for (const auto& vec : pendingSmallIndexObjects) { + for (const auto& tuple : vec) { + finalized++; + addObjectToSmallIndexUnsafe(std::get<0>(tuple), std::get<1>(tuple), std::get<2>(tuple)); + } + } + + std::cout << "indexed " << finalized << " contended objects" << std::endl; + finalizeObjects(name(), threadNum, baseZoom, objects.begin(), objects.end(), lowZoomObjects); finalizeObjects(name(), threadNum, baseZoom, objectsWithIds.begin(), objectsWithIds.end(), lowZoomObjectsWithIds); - } void TileDataSource::addObjectToSmallIndex(const TileCoordinates& index, const OutputObject& oo, uint64_t id) { @@ -90,8 +101,28 @@ void TileDataSource::addObjectToSmallIndex(const TileCoordinates& index, const O } const size_t z6index = z6x * CLUSTER_ZOOM_WIDTH + z6y; + auto& mutex = objectsMutex[z6index % objectsMutex.size()]; + + if (mutex.try_lock()) { + addObjectToSmallIndexUnsafe(index, oo, id); + mutex.unlock(); + } else { + // add to tlsPendingSmallIndexObjects + if (tlsPendingSmallIndexObjects == nullptr) { + std::lock_guard lock(objectsMutex[0]); + pendingSmallIndexObjects.push_back(std::vector>()); + tlsPendingSmallIndexObjects = &pendingSmallIndexObjects.back(); + } - std::lock_guard lock(objectsMutex[z6index % objectsMutex.size()]); + tlsPendingSmallIndexObjects->push_back(std::make_tuple(index, oo, id)); + } +} + +void TileDataSource::addObjectToSmallIndexUnsafe(const TileCoordinates& index, const OutputObject& oo, uint64_t id) { + // Pick the z6 index + const size_t z6x = index.x / z6OffsetDivisor; + const size_t z6y = index.y / z6OffsetDivisor; + const size_t z6index = z6x * CLUSTER_ZOOM_WIDTH + z6y; if (id == 0 || !includeID) objects[z6index].push_back({