Merge pull request #10 from opensource-spraakherkenning-nl/main

Fixed a bunch of bugs and connected functionality to interface
opensource-spraakherkenning-nl · Oct 5, 2023 · 1d96aae · 1d96aae
2 parents b0bd580 + 1a4745d
commit 1d96aae
Show file tree

Hide file tree

Showing 5 changed files with 29 additions and 16 deletions.
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -22,7 +22,7 @@ jobs:
         uses: actions/checkout@v2
 
       - name: Build and push Docker images
-        if: github.ref == 'refs/heads/master'
+        if: github.ref == 'refs/heads/main'
         uses: docker/build-push-action@v1
         with:
           username: ${{ secrets.DOCKER_USERNAME }}

diff --git a/ASR_NL_benchmark/__main__.py b/ASR_NL_benchmark/__main__.py
@@ -13,6 +13,10 @@
                         metavar=('reffile_name', 'extension'),
                         default=['ASR_NL_benchmark/data/test_ref.stm', 'stm'],
                         help='help: path to the reference file and its extension')
+    parser.add_argument('-kind', '--kind',
+                        metavar=('speechrecognizer'),
+                        default='',
+                        help='help: enter the name of your speech recognizer')
     parser.add_argument('-interactive',
                         metavar='value',
                         default='',
@@ -25,7 +29,7 @@
         interface.main()
     else:
         print('Running benchmarking')
-        benchmarking = pipeline.Pipeline(args.hypfile[0], args.hypfile[1], args.reffile[0], args.reffile[1])
+        benchmarking = pipeline.Pipeline(args.hypfile[0], args.hypfile[1], args.reffile[0], args.reffile[1], kind=args.kind)
         benchmarking.main()
-        pipeline.process_results()
+        pipeline.process_results(kind=args.kind)
 
diff --git a/ASR_NL_benchmark/interface.py b/ASR_NL_benchmark/interface.py
@@ -19,8 +19,9 @@ def upload_page():
     if request.method == 'POST':
         hyp = os.path.join(os.path.sep,'input',request.form.get('hyp'))
         ref = os.path.join(os.path.sep,'input',request.form.get('ref'))
+        kind = request.form.get('kind')
         global benchmarking
-        benchmarking = pipeline.Pipeline(hyp, 'ctm', ref, 'stm')
+        benchmarking = pipeline.Pipeline(hyp, 'ctm', ref, 'stm', kind)
         Thread(target=benchmarking.main).start()
         return redirect(f'/progress?ref={ref}&hyp={hyp}')
     return render_template('select_files.html')
@@ -62,12 +63,12 @@ def get_dfs():
         dfs[index]['cat']['df'] = pandas.read_csv(folder)
         dfs[index]['cat']['kind'] = dfs[index]['cat']['df']['kind'].iloc[0]
         dfs[index]['cat']['df'] = dfs[index]['cat']['df'].drop('kind',1)
-        dfs[index]['cat']['df']['product'] = dfs[index]['cat']['df']['Weighted_wer'] * dfs[index]['cat']['df']['ref_words']
+        dfs[index]['cat']['df']['product'] = dfs[index]['cat']['df']['WER'] * dfs[index]['cat']['df']['ref_words']
         dfs[index]['cat']['wer'] = dfs[index]['cat']['df']['product'].sum() / dfs[index]['cat']['df']['ref_words'].sum()
         dfs[index]['cat']['df'] = dfs[index]['cat']['df'].drop('product',1)
         print(dfs)
 
-    speaker_folders = [f.path for f in os.scandir(os.path.join(os.path.sep,'input','')) if
+    speaker_folders = [f.path for f in os.scandir(os.path.join(os.path.sep,'input','results','')) if
                        f.is_file() and f.name.startswith('results_speaker') and f.name.endswith('.csv')]
 
     for folder in speaker_folders:
@@ -77,16 +78,16 @@ def get_dfs():
         except KeyError:
             dfs[index] = {}
             dfs[index]['spk']= {}
-        dfs[index]['spk'] = {}
         dfs[index]['spk']['agregation'] = 'Per spreker'
         dfs[index]['spk']['df'] = pandas.read_csv(folder)
         dfs[index]['spk']['kind'] = dfs[index]['spk']['df']['kind'].iloc[0]
         dfs[index]['spk']['df'] = dfs[index]['spk']['df'].drop('kind', 1)
 
-        dfs[index]['spk']['df']['product'] = dfs[index]['spk']['df']['Weighted_wer'] * dfs[index]['spk']['df'][
+        dfs[index]['spk']['df']['product'] = dfs[index]['spk']['df']['WER'] * dfs[index]['spk']['df'][
             'ref_words']
         dfs[index]['spk']['wer'] = dfs[index]['spk']['df']['product'].sum() / dfs[index]['spk']['df']['ref_words'].sum()
         dfs[index]['spk']['df'] = dfs[index]['spk']['df'].drop('product', 1)
+        print(dfs)
 
     return dfs
 

diff --git a/ASR_NL_benchmark/pipeline.py b/ASR_NL_benchmark/pipeline.py
@@ -78,7 +78,7 @@ def calculate_wer(df):
     wer = float(df['product'].sum()) / float(df['ref_words'].sum())
     return df, wer
 
-def calculate_wer_per_cat(df,category='category', id='', kind=False):
+def calculate_wer_per_cat(df,category='category', id='', kind=''):
     """ Calculates the WER for every unique value for a certain column
     Args:
         df: the pandas dataframe
@@ -98,11 +98,11 @@ def calculate_wer_per_cat(df,category='category', id='', kind=False):
     1   banaan          2  0.40  False
 
     """
-    df_out = df.groupby('category', as_index=False).agg({'ref_words': 'sum', 'product': 'sum'})
+    df_out = df.groupby(category, as_index=False).agg({'ref_words': 'sum', 'product': 'sum'})
     df_out['WER'] = (df_out['product'] / df_out['ref_words']).round(2)
     df_out = df_out.drop('product', 1)
     df_out['kind'] = kind
-    df_out.to_csv(os.path.join(os.path.sep, 'input', 'results', f'results_{category}_{id}.csv'), index=False)
+    df_out.to_csv(os.path.join(os.path.sep, 'input', 'results', f'results_{category}_{id}_{kind}.csv'), index=False)
     return df_out
 
 def process_results_dtl_only(path_parts=('input','results'), id='', kind= False):
@@ -210,14 +210,19 @@ def process_input(hypfile_arg, reffile_arg):
 
 
 class Pipeline():
-    def __init__(self, hypfile_input_path, hypextension, reffile_input_path, refextension):
+    def __init__(self, hypfile_input_path, hypextension, reffile_input_path, refextension, kind):
         self.progress = 0
         self.failed = 0
-        self.hypfile_input_path = hypfile_input_path
-        self.reffile_input_path = reffile_input_path
+        self.hypfile_input_path = os.path.join(os.path.sep,'input',hypfile_input_path)
+        self.reffile_input_path = os.path.join(os.path.sep,'input',reffile_input_path)
         self.hypextension = hypextension
         self.refextension = refextension
+        self.kind = kind
         self.logging = set_logging(logpath=os.path.join(os.path.sep,'input',f'{date.today()}_logging.log'))
+        self.logging.info(f"hypfile path from terminal: {hypfile_input_path}")
+        self.logging.info(f"reffile path from terminal: {reffile_input_path}")
+        self.logging.info(f"Pipeline class' hypfile path: {self.hypfile_input_path}")
+        self.logging.info(f"Pipeline class' reffile path: {self.reffile_input_path}")
 
     def main(self):
         hyp_list, ref_list = process_input(self.hypfile_input_path, self.reffile_input_path)
@@ -238,7 +243,7 @@ def main(self):
                 done +=1
                 self.progress = done/total
                 self.failed += 1
-        process_results(path_parts=('input','results'), kind=False)
+        process_results(path_parts=('input','results'), kind=self.kind)
 
 
 

diff --git a/ASR_NL_benchmark/templates/select_files.html b/ASR_NL_benchmark/templates/select_files.html
@@ -26,6 +26,8 @@ <h1> Select Hypothese and Reference files or folders </h1>
   <div class="container pt-3 m-3" width="80%">
   <div class="form-group">
       <form method="POST">
+          <label>Name of speech recognizer</label>
+          <input type="text" class="form-control" id="kind" name="kind" placeholder="Name of speech recognizer">
           <label>Path to hypothesis file or folder</label>
           <input type="text" class="form-control" id="hyp" name="hyp" placeholder="Hyp File or folder">
           <label>Path to reference file or folder</label>
@@ -38,7 +40,8 @@ <h1> Select Hypothese and Reference files or folders </h1>
 
 
 
+
     <!-- Option 1: Bootstrap Bundle with Popper -->
     <script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/js/bootstrap.bundle.min.js" integrity="sha384-b5kHyXgcpbZJO/tY9Ul7kGkf1S0CWuKcCD38l8YkeH8z8QjE0GmW1gYU5S9FOnJ0" crossorigin="anonymous"></script>
   </body>
-</html>
+</html>