Skip to content

Commit

Permalink
Nadrop (dotnet#1810)
Browse files Browse the repository at this point in the history
* Fix MissingValueDroppingTransformer bug

* Add unit test

* Add baseline
  • Loading branch information
yaeldMS authored Dec 4, 2018
1 parent a06a0b7 commit 8a9b016
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ protected override Delegate MakeGetter(IRow input, int iinfo, Func<int, bool> ac

private ValueGetter<VBuffer<TDst>> MakeVecGetter<TDst>(IRow input, int iinfo)
{
var srcGetter = input.GetGetter<VBuffer<TDst>>(iinfo);
var srcGetter = input.GetGetter<VBuffer<TDst>>(_srcCols[iinfo]);
var buffer = default(VBuffer<TDst>);
var isNA = (InPredicate<TDst>)_isNAs[iinfo];
var def = default(TDst);
Expand Down
17 changes: 17 additions & 0 deletions test/BaselineOutput/Common/SavePipe/SavePipeDropNAs-Data.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#@ TextLoader{
#@ header+
#@ sep=tab
#@ col=Num:R4:0-1
#@ col=Sep:TX:2
#@ col=NumNAsDropped:R4:3-**
#@ col={name=Sep2 type=TX src={ min=-1}}
#@ col={name=Text type=TX src={ min=-1 var=+}}
#@ col={name=Sep3 type=TX src={ min=-1}}
#@ col={name=TextNAsDropped type=U4 src={ min=-1 var=+} key=0-3}
#@ }
"" "" Sep Sep2 Sep3
2 0 | 2 0 | Hello World! | 0
3 4 | 3 4 | |
0 ? | 0 | Bye all | 3 1
7 8 | 7 8 | Good bye | 2
? ? | | this is a |
29 changes: 29 additions & 0 deletions test/Microsoft.ML.TestFramework/DataPipe/TestDataPipe.cs
Original file line number Diff line number Diff line change
Expand Up @@ -785,6 +785,35 @@ public void SavePipeTokenizerAndStopWords()
Done();
}

[Fact]
public void SavePipeDropNAs()
{
string pathData = DeleteOutputPath("SavePipe", "DropNAs.txt");
File.WriteAllLines(pathData,
new[]
{
"2,0,|,Hello World!",
"3,4,|,",
"0,nan,|,Bye all",
"7,8,|,Good bye",
"?,nan,|,this is a"
});

TestCore(pathData, false,
new[]
{
"loader=Text{header- sep=, col=Num:R4:0-1 col=Sep:TX:2 col=Text:TX:3}",
"xf=NADrop{col=NumNAsDropped:Num}",
"xf=Token{col=Text}",
"xf=Term{col=Text2:Text terms=Hello,all,Good,Bye}",
"xf=NADrop{col=TextNAsDropped:Text2}",
"xf=Copy{col=Sep2:Sep col=Sep3:Sep}",
"xf=Select{keepcol=Num keepcol=Sep keepcol=NumNAsDropped keepcol=Sep2 keepcol=Text keepcol=Sep3 keepcol=TextNAsDropped}"
}, baselineSchema: false, roundTripText: false);

Done();
}

[Fact]
public void TestHashTransformFloat()
{
Expand Down

0 comments on commit 8a9b016

Please sign in to comment.