Skip to content

Commit

Permalink
Merge pull request #10 from opensource-spraakherkenning-nl/main
Browse files Browse the repository at this point in the history
Fixed a bunch of bugs and connected functionality to interface
  • Loading branch information
greenw0lf authored Oct 5, 2023
2 parents b0bd580 + 1a4745d commit 1d96aae
Show file tree
Hide file tree
Showing 5 changed files with 29 additions and 16 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ jobs:
uses: actions/checkout@v2

- name: Build and push Docker images
if: github.ref == 'refs/heads/master'
if: github.ref == 'refs/heads/main'
uses: docker/build-push-action@v1
with:
username: ${{ secrets.DOCKER_USERNAME }}
Expand Down
8 changes: 6 additions & 2 deletions ASR_NL_benchmark/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@
metavar=('reffile_name', 'extension'),
default=['ASR_NL_benchmark/data/test_ref.stm', 'stm'],
help='help: path to the reference file and its extension')
parser.add_argument('-kind', '--kind',
metavar=('speechrecognizer'),
default='',
help='help: enter the name of your speech recognizer')
parser.add_argument('-interactive',
metavar='value',
default='',
Expand All @@ -25,7 +29,7 @@
interface.main()
else:
print('Running benchmarking')
benchmarking = pipeline.Pipeline(args.hypfile[0], args.hypfile[1], args.reffile[0], args.reffile[1])
benchmarking = pipeline.Pipeline(args.hypfile[0], args.hypfile[1], args.reffile[0], args.reffile[1], kind=args.kind)
benchmarking.main()
pipeline.process_results()
pipeline.process_results(kind=args.kind)

11 changes: 6 additions & 5 deletions ASR_NL_benchmark/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,9 @@ def upload_page():
if request.method == 'POST':
hyp = os.path.join(os.path.sep,'input',request.form.get('hyp'))
ref = os.path.join(os.path.sep,'input',request.form.get('ref'))
kind = request.form.get('kind')
global benchmarking
benchmarking = pipeline.Pipeline(hyp, 'ctm', ref, 'stm')
benchmarking = pipeline.Pipeline(hyp, 'ctm', ref, 'stm', kind)
Thread(target=benchmarking.main).start()
return redirect(f'/progress?ref={ref}&hyp={hyp}')
return render_template('select_files.html')
Expand Down Expand Up @@ -62,12 +63,12 @@ def get_dfs():
dfs[index]['cat']['df'] = pandas.read_csv(folder)
dfs[index]['cat']['kind'] = dfs[index]['cat']['df']['kind'].iloc[0]
dfs[index]['cat']['df'] = dfs[index]['cat']['df'].drop('kind',1)
dfs[index]['cat']['df']['product'] = dfs[index]['cat']['df']['Weighted_wer'] * dfs[index]['cat']['df']['ref_words']
dfs[index]['cat']['df']['product'] = dfs[index]['cat']['df']['WER'] * dfs[index]['cat']['df']['ref_words']
dfs[index]['cat']['wer'] = dfs[index]['cat']['df']['product'].sum() / dfs[index]['cat']['df']['ref_words'].sum()
dfs[index]['cat']['df'] = dfs[index]['cat']['df'].drop('product',1)
print(dfs)

speaker_folders = [f.path for f in os.scandir(os.path.join(os.path.sep,'input','')) if
speaker_folders = [f.path for f in os.scandir(os.path.join(os.path.sep,'input','results','')) if
f.is_file() and f.name.startswith('results_speaker') and f.name.endswith('.csv')]

for folder in speaker_folders:
Expand All @@ -77,16 +78,16 @@ def get_dfs():
except KeyError:
dfs[index] = {}
dfs[index]['spk']= {}
dfs[index]['spk'] = {}
dfs[index]['spk']['agregation'] = 'Per spreker'
dfs[index]['spk']['df'] = pandas.read_csv(folder)
dfs[index]['spk']['kind'] = dfs[index]['spk']['df']['kind'].iloc[0]
dfs[index]['spk']['df'] = dfs[index]['spk']['df'].drop('kind', 1)

dfs[index]['spk']['df']['product'] = dfs[index]['spk']['df']['Weighted_wer'] * dfs[index]['spk']['df'][
dfs[index]['spk']['df']['product'] = dfs[index]['spk']['df']['WER'] * dfs[index]['spk']['df'][
'ref_words']
dfs[index]['spk']['wer'] = dfs[index]['spk']['df']['product'].sum() / dfs[index]['spk']['df']['ref_words'].sum()
dfs[index]['spk']['df'] = dfs[index]['spk']['df'].drop('product', 1)
print(dfs)

return dfs

Expand Down
19 changes: 12 additions & 7 deletions ASR_NL_benchmark/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def calculate_wer(df):
wer = float(df['product'].sum()) / float(df['ref_words'].sum())
return df, wer

def calculate_wer_per_cat(df,category='category', id='', kind=False):
def calculate_wer_per_cat(df,category='category', id='', kind=''):
""" Calculates the WER for every unique value for a certain column
Args:
df: the pandas dataframe
Expand All @@ -98,11 +98,11 @@ def calculate_wer_per_cat(df,category='category', id='', kind=False):
1 banaan 2 0.40 False
"""
df_out = df.groupby('category', as_index=False).agg({'ref_words': 'sum', 'product': 'sum'})
df_out = df.groupby(category, as_index=False).agg({'ref_words': 'sum', 'product': 'sum'})
df_out['WER'] = (df_out['product'] / df_out['ref_words']).round(2)
df_out = df_out.drop('product', 1)
df_out['kind'] = kind
df_out.to_csv(os.path.join(os.path.sep, 'input', 'results', f'results_{category}_{id}.csv'), index=False)
df_out.to_csv(os.path.join(os.path.sep, 'input', 'results', f'results_{category}_{id}_{kind}.csv'), index=False)
return df_out

def process_results_dtl_only(path_parts=('input','results'), id='', kind= False):
Expand Down Expand Up @@ -210,14 +210,19 @@ def process_input(hypfile_arg, reffile_arg):


class Pipeline():
def __init__(self, hypfile_input_path, hypextension, reffile_input_path, refextension):
def __init__(self, hypfile_input_path, hypextension, reffile_input_path, refextension, kind):
self.progress = 0
self.failed = 0
self.hypfile_input_path = hypfile_input_path
self.reffile_input_path = reffile_input_path
self.hypfile_input_path = os.path.join(os.path.sep,'input',hypfile_input_path)
self.reffile_input_path = os.path.join(os.path.sep,'input',reffile_input_path)
self.hypextension = hypextension
self.refextension = refextension
self.kind = kind
self.logging = set_logging(logpath=os.path.join(os.path.sep,'input',f'{date.today()}_logging.log'))
self.logging.info(f"hypfile path from terminal: {hypfile_input_path}")
self.logging.info(f"reffile path from terminal: {reffile_input_path}")
self.logging.info(f"Pipeline class' hypfile path: {self.hypfile_input_path}")
self.logging.info(f"Pipeline class' reffile path: {self.reffile_input_path}")

def main(self):
hyp_list, ref_list = process_input(self.hypfile_input_path, self.reffile_input_path)
Expand All @@ -238,7 +243,7 @@ def main(self):
done +=1
self.progress = done/total
self.failed += 1
process_results(path_parts=('input','results'), kind=False)
process_results(path_parts=('input','results'), kind=self.kind)



Expand Down
5 changes: 4 additions & 1 deletion ASR_NL_benchmark/templates/select_files.html
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ <h1> Select Hypothese and Reference files or folders </h1>
<div class="container pt-3 m-3" width="80%">
<div class="form-group">
<form method="POST">
<label>Name of speech recognizer</label>
<input type="text" class="form-control" id="kind" name="kind" placeholder="Name of speech recognizer">
<label>Path to hypothesis file or folder</label>
<input type="text" class="form-control" id="hyp" name="hyp" placeholder="Hyp File or folder">
<label>Path to reference file or folder</label>
Expand All @@ -38,7 +40,8 @@ <h1> Select Hypothese and Reference files or folders </h1>




<!-- Option 1: Bootstrap Bundle with Popper -->
<script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/js/bootstrap.bundle.min.js" integrity="sha384-b5kHyXgcpbZJO/tY9Ul7kGkf1S0CWuKcCD38l8YkeH8z8QjE0GmW1gYU5S9FOnJ0" crossorigin="anonymous"></script>
</body>
</html>
</html>

0 comments on commit 1d96aae

Please sign in to comment.