From d7c613d33a5e82df300fd8b368e6e46a371e85c4 Mon Sep 17 00:00:00 2001 From: Emanuele Danovaro Date: Wed, 1 Nov 2023 16:07:42 +0000 Subject: [PATCH 1/6] FDB-320 - added compact representation of available fields --- src/fdb5/tools/fdb-list.cc | 73 ++++++++++++++++++++++++++++++++++---- 1 file changed, 66 insertions(+), 7 deletions(-) diff --git a/src/fdb5/tools/fdb-list.cc b/src/fdb5/tools/fdb-list.cc index de4271646..259d0a777 100644 --- a/src/fdb5/tools/fdb-list.cc +++ b/src/fdb5/tools/fdb-list.cc @@ -17,6 +17,8 @@ #include "eckit/option/CmdArgs.h" #include "eckit/log/JSON.h" +#include "metkit/hypercube/HyperCube.h" + #include "fdb5/api/FDB.h" #include "fdb5/api/helpers/FDBToolRequest.h" #include "fdb5/database/DB.h" @@ -47,6 +49,7 @@ class FDBList : public FDBVisitTool { options_.push_back(new SimpleOption("full", "Include all entries (including masked duplicates)")); options_.push_back(new SimpleOption("porcelain", "Streamlined and stable output for input into other tools")); options_.push_back(new SimpleOption("json", "Output available fields in JSON form")); + options_.push_back(new SimpleOption("compact", "Aggregate available fields in MARS requests")); } private: // methods @@ -58,6 +61,7 @@ class FDBList : public FDBVisitTool { bool full_; bool porcelain_; bool json_; + bool compact_; }; void FDBList::init(const CmdArgs& args) { @@ -68,17 +72,40 @@ void FDBList::init(const CmdArgs& args) { full_ = args.getBool("full", false); porcelain_ = args.getBool("porcelain", false); json_ = args.getBool("json", false); + compact_ = args.getBool("compact", false); if (json_) { porcelain_ = true; if (location_) { - throw UserError("--json and --location not compatible", Here()); + throw UserError("--json and --location are not compatible", Here()); } } + if (compact_) { + if (location_) { + throw UserError("--compact and --location are not compatible", Here()); + } + if (full_) { + throw UserError("--compact and --full are not compatible", Here()); + } + if (porcelain_) { + throw UserError("--compact and --porcelain are not compatible", Here()); + } + } + /// @todo option ignore-errors } +std::string keySignature(const fdb5::Key& key) { + std::string signature; + std::string separator=""; + for (auto k : key.keys()) { + signature += separator+k; + separator=":"; + } + return signature; +} + void FDBList::execute(const CmdArgs& args) { FDB fdb(config(args)); @@ -98,18 +125,50 @@ void FDBList::execute(const CmdArgs& args) { } // If --full is supplied, then include all entries including duplicates. - auto listObject = fdb.list(request, !full_); + auto listObject = fdb.list(request, !full_ && !compact_); + std::unordered_set seenKeys; + std::map requests; size_t count = 0; ListElement elem; while (listObject.next(elem)) { - if (json_) { - (*json) << elem; + if (compact_) { + fdb5::Key combined = elem.combinedKey(); + std::string axes = keySignature(combined); + auto it = requests.find(axes); + if (it == requests.end()) { + requests.emplace(axes, combined.request()); + } else { + it->second.merge(combined.request()); + } + seenKeys.emplace(combined); } else { - elem.print(Log::info(), location_, !porcelain_); - Log::info() << std::endl; - count++; + if (json_) { + (*json) << elem; + } else { + elem.print(Log::info(), location_, !porcelain_); + Log::info() << std::endl; + count++; + } + } + } + if (compact_) { + std::map hypercubes; + + for (auto r: requests) { + hypercubes.emplace(r.first, new metkit::hypercube::HyperCube(r.second)); + } + for (auto k: seenKeys) { + auto it = hypercubes.find(keySignature(k)); + ASSERT(it != hypercubes.end()); + it->second->clear(k.request()); + } + for (auto h: hypercubes) { + for (auto r: h.second->requests()) { + r.dump(Log::info(), "", ""); + Log::info() << std::endl; + } } } From 317d929f1b7ff365d1bbdadcf93e18485a3b0db1 Mon Sep 17 00:00:00 2001 From: Emanuele Danovaro Date: Fri, 17 Nov 2023 12:56:49 +0000 Subject: [PATCH 2/6] FDB-320 simplified data aggregation (only at dense-set level) --- src/fdb5/tools/fdb-list.cc | 51 ++++++++++++++------------------------ 1 file changed, 19 insertions(+), 32 deletions(-) diff --git a/src/fdb5/tools/fdb-list.cc b/src/fdb5/tools/fdb-list.cc index 259d0a777..748ad0287 100644 --- a/src/fdb5/tools/fdb-list.cc +++ b/src/fdb5/tools/fdb-list.cc @@ -96,16 +96,6 @@ void FDBList::init(const CmdArgs& args) { /// @todo option ignore-errors } -std::string keySignature(const fdb5::Key& key) { - std::string signature; - std::string separator=""; - for (auto k : key.keys()) { - signature += separator+k; - separator=":"; - } - return signature; -} - void FDBList::execute(const CmdArgs& args) { FDB fdb(config(args)); @@ -126,46 +116,43 @@ void FDBList::execute(const CmdArgs& args) { // If --full is supplied, then include all entries including duplicates. auto listObject = fdb.list(request, !full_ && !compact_); - std::unordered_set seenKeys; - std::map requests; + std::map>> requests; - size_t count = 0; ListElement elem; while (listObject.next(elem)) { if (compact_) { - fdb5::Key combined = elem.combinedKey(); - std::string axes = keySignature(combined); - auto it = requests.find(axes); + std::vector keys = elem.key(); + ASSERT(keys.size() == 3); + + std::string treeAxes = keys[0]; + treeAxes += ","; + treeAxes += keys[1]; + + auto it = requests.find(treeAxes); if (it == requests.end()) { - requests.emplace(axes, combined.request()); + requests.emplace(treeAxes, std::make_pair(keys[2].request(), std::unordered_set{keys[2]})); } else { - it->second.merge(combined.request()); + it->second.first.merge(keys[2].request()); + it->second.second.emplace(keys[2]); } - seenKeys.emplace(combined); } else { if (json_) { (*json) << elem; } else { elem.print(Log::info(), location_, !porcelain_); Log::info() << std::endl; - count++; } } } if (compact_) { - std::map hypercubes; - - for (auto r: requests) { - hypercubes.emplace(r.first, new metkit::hypercube::HyperCube(r.second)); - } - for (auto k: seenKeys) { - auto it = hypercubes.find(keySignature(k)); - ASSERT(it != hypercubes.end()); - it->second->clear(k.request()); - } - for (auto h: hypercubes) { - for (auto r: h.second->requests()) { + for (const auto& tree: requests) { + metkit::hypercube::HyperCube h{tree.second.first}; + for (const auto& k: tree.second.second) { + h.clear(k.request()); + } + for (const auto& r: h.requests()) { + Log::info() << tree.first << ","; r.dump(Log::info(), "", ""); Log::info() << std::endl; } From 71468845d8137a31c0c4a3277d6977833e22cb54 Mon Sep 17 00:00:00 2001 From: Emanuele Danovaro Date: Fri, 17 Nov 2023 13:19:00 +0000 Subject: [PATCH 3/6] FDB-320 fix output --- src/fdb5/tools/fdb-list.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/fdb5/tools/fdb-list.cc b/src/fdb5/tools/fdb-list.cc index 748ad0287..af8ab7fb3 100644 --- a/src/fdb5/tools/fdb-list.cc +++ b/src/fdb5/tools/fdb-list.cc @@ -152,8 +152,8 @@ void FDBList::execute(const CmdArgs& args) { h.clear(k.request()); } for (const auto& r: h.requests()) { - Log::info() << tree.first << ","; - r.dump(Log::info(), "", ""); + Log::info() << "retrieve," << tree.first << ","; + r.dump(Log::info(), "", "", false); Log::info() << std::endl; } } From 27a9c3e484ce0700d9556d5a351ca2703a30e699 Mon Sep 17 00:00:00 2001 From: Emanuele Danovaro Date: Fri, 17 Nov 2023 14:18:34 +0000 Subject: [PATCH 4/6] FDB-320 optimised request in case of full hypercube --- src/fdb5/tools/fdb-list.cc | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/src/fdb5/tools/fdb-list.cc b/src/fdb5/tools/fdb-list.cc index af8ab7fb3..7f5864687 100644 --- a/src/fdb5/tools/fdb-list.cc +++ b/src/fdb5/tools/fdb-list.cc @@ -148,13 +148,19 @@ void FDBList::execute(const CmdArgs& args) { if (compact_) { for (const auto& tree: requests) { metkit::hypercube::HyperCube h{tree.second.first}; - for (const auto& k: tree.second.second) { - h.clear(k.request()); - } - for (const auto& r: h.requests()) { + if (h.size() == tree.second.second.size()) { Log::info() << "retrieve," << tree.first << ","; - r.dump(Log::info(), "", "", false); + tree.second.first.dump(Log::info(), "", "", false); Log::info() << std::endl; + } else { + for (const auto& k: tree.second.second) { + h.clear(k.request()); + } + for (const auto& r: h.requests()) { + Log::info() << "retrieve," << tree.first << ","; + r.dump(Log::info(), "", "", false); + Log::info() << std::endl; + } } } } From 277deac7a183964ff6698a3520236fe400a07ccd Mon Sep 17 00:00:00 2001 From: Emanuele Danovaro Date: Fri, 17 Nov 2023 14:47:42 +0000 Subject: [PATCH 5/6] FDB-320 checking leaf structure (to handle optional metadata) --- src/fdb5/tools/fdb-list.cc | 54 +++++++++++++++++++++++++++----------- 1 file changed, 38 insertions(+), 16 deletions(-) diff --git a/src/fdb5/tools/fdb-list.cc b/src/fdb5/tools/fdb-list.cc index 7f5864687..25712189b 100644 --- a/src/fdb5/tools/fdb-list.cc +++ b/src/fdb5/tools/fdb-list.cc @@ -64,6 +64,18 @@ class FDBList : public FDBVisitTool { bool compact_; }; + +std::string keySignature(const fdb5::Key& key) { + std::string signature; + std::string separator=""; + for (auto k : key.keys()) { + signature += separator+k; + separator=":"; + } + return signature; +} + + void FDBList::init(const CmdArgs& args) { FDBVisitTool::init(args); @@ -116,7 +128,7 @@ void FDBList::execute(const CmdArgs& args) { // If --full is supplied, then include all entries including duplicates. auto listObject = fdb.list(request, !full_ && !compact_); - std::map>> requests; + std::map>>> requests; ListElement elem; while (listObject.next(elem)) { @@ -129,12 +141,21 @@ void FDBList::execute(const CmdArgs& args) { treeAxes += ","; treeAxes += keys[1]; + std::string signature=keySignature(keys[2]); + auto it = requests.find(treeAxes); if (it == requests.end()) { - requests.emplace(treeAxes, std::make_pair(keys[2].request(), std::unordered_set{keys[2]})); + std::map>> leaves; + leaves.emplace(signature, std::make_pair(keys[2].request(), std::unordered_set{keys[2]})); + requests.emplace(treeAxes, leaves); } else { - it->second.first.merge(keys[2].request()); - it->second.second.emplace(keys[2]); + auto h = it->second.find(signature); + if (h != it->second.end()) { // the hypercube request is already there... adding the 3rd level key + h->second.first.merge(keys[2].request()); + h->second.second.insert(keys[2]); + } else { + it->second.emplace(signature, std::make_pair(keys[2].request(), std::unordered_set{keys[2]})); + } } } else { if (json_) { @@ -147,24 +168,25 @@ void FDBList::execute(const CmdArgs& args) { } if (compact_) { for (const auto& tree: requests) { - metkit::hypercube::HyperCube h{tree.second.first}; - if (h.size() == tree.second.second.size()) { - Log::info() << "retrieve," << tree.first << ","; - tree.second.first.dump(Log::info(), "", "", false); - Log::info() << std::endl; - } else { - for (const auto& k: tree.second.second) { - h.clear(k.request()); - } - for (const auto& r: h.requests()) { + for (const auto& leaf: tree.second) { + metkit::hypercube::HyperCube h{leaf.second.first}; + if (h.size() == leaf.second.second.size()) { Log::info() << "retrieve," << tree.first << ","; - r.dump(Log::info(), "", "", false); + leaf.second.first.dump(Log::info(), "", "", false); Log::info() << std::endl; + } else { + for (const auto& k: leaf.second.second) { + h.clear(k.request()); + } + for (const auto& r: h.requests()) { + Log::info() << "retrieve," << tree.first << ","; + r.dump(Log::info(), "", "", false); + Log::info() << std::endl; + } } } } } - // n.b. finding no data is not an error for fdb-list } From 06413729188f057f04b203b9c5c36114a85c53c5 Mon Sep 17 00:00:00 2001 From: Emanuele Danovaro Date: Fri, 9 Feb 2024 16:44:19 +0100 Subject: [PATCH 6/6] version bump --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 30e5569b9..7a979be0b 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -5.11.29 +5.11.30