generated from adrienaury/go-template
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* feat(scan): alias and filters * test(scan): fix venom tests * fix: self reference optimization * feat(scan): rename only to include * feat(dump): wip! include only fields * feat(dump): wip! add include flag * feat(dump): include flag + detect complete entity * feat(dump): add dump observers * feat(dump): add watch flag * feat(dump): rename incomplete to consistent
- Loading branch information
1 parent
72172c8
commit ba2a17c
Showing
12 changed files
with
413 additions
and
50 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -65,6 +65,34 @@ $ silo scan my-silo < input.jsonl | |
|
||
Analysis data is persisted on disk on the `my-silo` path relative to the current directory. | ||
|
||
#### passthrough stdin to stdout | ||
|
||
Use `--passthrough` (short : `-p`) to pass input to stdout instead of diplaying informations. | ||
|
||
```console | ||
$ silo scan my-silo --passthrough < input.jsonl | ||
{"ID_CLIENT":"0001","EMAIL_CLIENT":"[email protected]","ACCOUNT_NUMBER":null} | ||
{"ID_CLIENT":null,"EMAIL_CLIENT":null,"ACCOUNT_NUMBER":"C01"} | ||
``` | ||
|
||
#### include only specific fields/columns | ||
|
||
Use `--include <fieldname>` (short : `-i <fieldname>`, repeatable) to select only given columns to scan. | ||
|
||
```console | ||
$ silo scan my-silo --include ID_CLIENT --include EMAIL_CLIENT < input.jsonl | ||
⣾ Scanned 5 rows, found 15 links (4084 row/s) [0s] | ||
``` | ||
|
||
#### rename fields/columns on the fly | ||
|
||
Use `--alias <fieldname>=<alias>` (short : `-a <fieldname>=<alias>`, repeatable) to rename fields before storing links. | ||
|
||
```console | ||
$ silo scan my-silo --alias ID_CLIENT=CLIENT --alias EMAIL_CLIENT=EMAIL < input.jsonl | ||
⣾ Scanned 5 rows, found 15 links (4084 row/s) [0s] | ||
``` | ||
|
||
### silo dump | ||
|
||
The silo dump command is used to dump each connected entity into a file. This allows users to create a referential of all entities discovered within the JSONLine data. Here's how to use it: | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
// Copyright (C) 2024 CGI France | ||
// | ||
// This file is part of SILO. | ||
// | ||
// SILO is free software: you can redistribute it and/or modify | ||
// it under the terms of the GNU General Public License as published by | ||
// the Free Software Foundation, either version 3 of the License, or | ||
// (at your option) any later version. | ||
// | ||
// SILO is distributed in the hope that it will be useful, | ||
// but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
// GNU General Public License for more details. | ||
// | ||
// You should have received a copy of the GNU General Public License | ||
// along with SILO. If not, see <http://www.gnu.org/licenses/>. | ||
|
||
package infra | ||
|
||
import ( | ||
"fmt" | ||
"os" | ||
"time" | ||
|
||
"github.com/cgi-fr/silo/pkg/silo" | ||
"github.com/schollz/progressbar/v3" | ||
) | ||
|
||
type DumpObserver struct { | ||
countTotal int | ||
countComplete int | ||
countConsistent int | ||
countInconsistent int | ||
countEmpty int | ||
bar *progressbar.ProgressBar | ||
} | ||
|
||
func NewDumpObserver() *DumpObserver { | ||
//nolint:gomnd | ||
pgb := progressbar.NewOptions(-1, | ||
progressbar.OptionSetDescription("Dumping ... "), | ||
progressbar.OptionSetItsString("entity"), | ||
progressbar.OptionSetWriter(os.Stderr), | ||
progressbar.OptionShowIts(), | ||
progressbar.OptionSpinnerType(11), | ||
progressbar.OptionThrottle(time.Millisecond*10), | ||
progressbar.OptionOnCompletion(func() { fmt.Fprintln(os.Stderr) }), | ||
// progressbar.OptionShowDescriptionAtLineEnd(), | ||
) | ||
|
||
return &DumpObserver{ | ||
countTotal: 0, | ||
countComplete: 0, | ||
countConsistent: 0, | ||
countInconsistent: 0, | ||
countEmpty: 0, | ||
bar: pgb, | ||
} | ||
} | ||
|
||
func (o *DumpObserver) Entity(status silo.Status, _ map[string]int) { | ||
o.countTotal++ | ||
|
||
switch status { | ||
case silo.StatusEntityComplete: | ||
o.countComplete++ | ||
case silo.StatusEntityConsistent: | ||
o.countConsistent++ | ||
case silo.StatusEntityInconsistent: | ||
o.countInconsistent++ | ||
case silo.StatusEntityEmpty: | ||
o.countEmpty++ | ||
} | ||
|
||
_ = o.bar.Add(1) | ||
|
||
o.bar.Describe(fmt.Sprintf("Dumped %d entities / complete=%d / consistent=%d / inconsistent=%d / empty=%d", | ||
o.countTotal, | ||
o.countComplete, | ||
o.countConsistent, | ||
o.countInconsistent, | ||
o.countEmpty)) | ||
} | ||
|
||
func (o *DumpObserver) Close() { | ||
_ = o.bar.Close() | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
// Copyright (C) 2024 CGI France | ||
// | ||
// This file is part of SILO. | ||
// | ||
// SILO is free software: you can redistribute it and/or modify | ||
// it under the terms of the GNU General Public License as published by | ||
// the Free Software Foundation, either version 3 of the License, or | ||
// (at your option) any later version. | ||
// | ||
// SILO is distributed in the hope that it will be useful, | ||
// but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
// GNU General Public License for more details. | ||
// | ||
// You should have received a copy of the GNU General Public License | ||
// along with SILO. If not, see <http://www.gnu.org/licenses/>. | ||
|
||
package silo | ||
|
||
import "errors" | ||
|
||
type config struct { | ||
include map[string]bool | ||
includeList []string | ||
aliases map[string]string | ||
} | ||
|
||
func newConfig() *config { | ||
config := config{ | ||
include: map[string]bool{}, | ||
includeList: []string{}, | ||
aliases: map[string]string{}, | ||
} | ||
|
||
return &config | ||
} | ||
|
||
func (cfg *config) validate() error { | ||
var errs []error | ||
|
||
for key := range cfg.aliases { | ||
if _, ok := cfg.include[key]; !ok && len(cfg.include) > 0 { | ||
errs = append(errs, &ConfigScanAliasIsNotIncludedError{alias: key}) | ||
} | ||
} | ||
|
||
if len(errs) != 0 { | ||
return errors.Join(errs...) | ||
} | ||
|
||
return nil | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.