PhasesResearchLab · ricardonpa · Nov 26, 2024 · Nov 27, 2024 · Nov 27, 2024 · Nov 27, 2024
diff --git a/.github/workflows/csv2readme.py b/.github/workflows/csv2readme.py
@@ -0,0 +1,89 @@
+import os
+import csv
+import textwrap
+import datetime
+
+def update_readme():
+    csv_dir = '.github/excel2csv'
+    readme_path = 'README.md'
+
+    if not os.path.exists(csv_dir):
+        print(f"Directory {csv_dir} does not exist.")
+        return
+
+    files = []
+    names = []
+    emails = []
+    comments = []
+    line_counts = []
+
+    for filename in os.listdir(csv_dir):
+        if filename.endswith('.csv'):
+            with open(os.path.join(csv_dir, filename), 'r') as csvfile:
+                csvreader = csv.reader(csvfile)
+                current_name = ''
+                current_email = ''
+                current_comment = ''
+                line_count = 0
+                for i, row in enumerate(csvreader):
+                    if i == 0:
+                        current_name = row[1].strip()  # Extract Name
+                    elif i == 1:
+                        current_email = row[1].strip()  # Extract Email
+                    elif i == 4:
+                        current_comment = row[1].strip()  # Extract Comment
+                    # Count lines starting from line 8
+                    if i >= 7:
+                        line_count += 1
+                # Append the extracted data for the current file to the lists
+                files.append(filename)
+                names.append(current_name)
+                emails.append(current_email)
+                comments.append(current_comment)
+                line_counts.append(line_count)
+
+    if not files:
+        print("No CSV files found. Skipping README update.")
+        return
+
+    readme = ''
+
+    if files: 
+        # Writes README.md preamble
+        readme += textwrap.dedent(f'''
+        ## This Dataset Contributions
+
+        **Name:** {' / '.join(set(names))}
+        <br>
+        **Email:** {' / '.join(set(emails))}
+        ''')
+
+        for i, comment in enumerate(comments):
+            readme += textwrap.dedent(f'''
+            ```
+            File: {files[i]}
+            Datapoints: {line_counts[i]}
+            Comment: {comment}
+            ```
+            ''')
+
+        readme += textwrap.dedent(f'''
+        **Last time updated:** {datetime.datetime.now().strftime("%m-%d-%Y %I:%M%p").lower()}
+        ''')
+
+    readme += textwrap.dedent('''
+    ## The ULTERA Database
+    This template repository was developed for contributing to the [**ULTERA Database**](https://ultera.org) carried under the [**DOE ARPA-E ULTIMATE program**](https://arpa-e.energy.gov/?q=arpa-e-programs/ultimate) that aims to develop a new generation of materials for turbine blades in gas turbines and related applications. 
+
+    The main scope of this dataset is collecting data on compositionally complex alloys (CCAs), also known as high entropy alloys (HEAs) and multi-principle-element alloys (MPEAs), with extra attention given to (1) high-temperature (refractory) mechanical data, (2) phases present under different processing conditions. Although low-entropy alloys (incl. binaries) are typically not presented to the end-user (or counted in statistics), some are present and used in ML efforts; thus **all high-quality alloy data contributions are welcome!**
+
+    For further information, please visit the [ULTERA-contribute](https://github.com/PhasesResearchLab/ULTERA-contribute/) repository.
+    ''')
+
+    with open(readme_path, 'w') as readme_file:
+        readme_file.write(readme)
+
+    print(f"README.md has been updated with the latest contributions.")
+
+if __name__ == '__main__':
+    update_readme()
diff --git a/...ntribute/pyqalloy-contribute/excel2csv.py → .github/workflows/excel2csv.py b/...ntribute/pyqalloy-contribute/excel2csv.py → .github/workflows/excel2csv.py
@@ -1,83 +1,86 @@
-# %%
-import pandas as pd
-import sys
-import json
-import os
-
-
-def convert(datasheet: str):
-    '''This function converts an PyQAlloy-compliant Excel datasheet into a CSV file for the purpose of
-    tracking changes in the data collection and curation, while preserving the original template/datasheet
-    file along with its style and formatting. The CSV file is named after the original datasheet file, with
-    the extension changed to .csv. The metadata is stored in the first few lines of the CSV file, and the
-    data is stored in the rest of the file.
-
-    Args:
-        datasheet: Path to PyQAlloy-compliant Excel datasheet file.
-    '''
-
-    # Import metadata
-    print('Reading the metadata.')
-    metaDF = pd.read_excel(datasheet,
-                           usecols="A:F",
-                           nrows=4)
-    meta = metaDF.to_json(orient="split")
-    metaParsed = json.loads(meta, strict=False)['data']
-
-    # Format metadata into a dictionary
-    metaData = {
-        'Name': metaParsed[0][1],
-        'Email': metaParsed[1][1],
-        'Direct Fetched': metaParsed[2][1],
-        'Hand Fetched': metaParsed[3][1],
-        'Comment': metaParsed[0][5]
-    }
-
-    # Logging progress into a CSV table
-    dataFileName = datasheet.replace('.xlsx', '').replace('.xls', '')
-
-    # Import data
-    print('Importing data.')
-    df2 = pd.read_excel(datasheet,
-                        usecols="A:N",
-                        nrows=20000,
-                        skiprows=8)
-    # Convert the dataset
-    parsed = df2.to_json(orient="split")
-    labels = json.loads(parsed, strict=False)['columns']
-    data = json.loads(parsed, strict=False)['data']
-
-    print('Imported ' + str(len(data)) + ' datapoints.')
-
-    with open(dataFileName + '.csv', 'w+') as outFile:
-        # Write the metadata
-        for line, val in metaData.items():
-            outFile.write(line + ':,' + str(val) + '\n')
-        outFile.write('\n')
-        # Write the data
-        outFile.write(','.join(labels) + '\n')
-        for line in data:
-            outFile.write(','.join(str(val) for val in line) + '\n')
-
-        print('Successfully converted ' + datasheet + ' to ' + dataFileName + '.csv\n')
-
-
-def detectDatasheetsAndConvert(path: str):
-    '''This function detects all PyQAlloy-compliant Excel datasheets in a directory and converts them into
-    CSV files. It skips the empty template file (template_v4.xlsx).
-
-    Args:
-        path: Path to the directory containing PyQAlloy-compliant Excel datasheets.
-    '''
-
-    for file in os.listdir(path):
-        if file.endswith('.xlsx'):
-            if file not in ['template_v4.xlsx', 'template_v4_DatasetExample.xlsx']:
-                print('Converting ' + file)
-                convert(path + '/' + file)
-            else:
-                print('Skipping ' + file)
-
-
-if __name__ == '__main__':
-    detectDatasheetsAndConvert(sys.argv[1])
+import pandas as pd
+import fnmatch
+import sys
+import json
+import os
+
+def convert(datasheet: str):
+    '''This function converts an ULTERA-compliant Excel datasheet into a CSV file for the purpose of
+    tracking changes in the data collection and curation, while preserving the original template/datasheet
+    file along with its style and formatting. The CSV file is named after the original datasheet file, with
+    the extension changed to .csv. The metadata is stored in the first few lines of the CSV file, and the
+    data is stored in the rest of the file.
+
+    Args:
+        datasheet: Path to ULTERA-compliant Excel datasheet file.
+    '''
+
+    # Import metadata
+    print('Reading the metadata.')
+    metaDF = pd.read_excel(datasheet,
+                           usecols="A:F",
+                           nrows=4)
+    meta = metaDF.to_json(orient="split")
+    metaParsed = json.loads(meta, strict=False)['data']
+
+    # Format metadata into a dictionary
+    metaData = {
+        'Name': metaParsed[0][1],
+        'Email': metaParsed[1][1],
+        'Direct Fetched': metaParsed[2][1],
+        'Hand Fetched': metaParsed[3][1],
+        'Comment': metaParsed[0][5]
+    }
+
+    # Logging progress into a CSV table
+    dataFileName = datasheet.replace('.xlsx', '').replace('.xls', '')
+
+    # Import data
+    print('Importing data.')
+    df2 = pd.read_excel(datasheet,
+                        usecols="A:N",
+                        nrows=20000,
+                        skiprows=8)
+    # Convert the dataset
+    parsed = df2.to_json(orient="split")
+    labels = json.loads(parsed, strict=False)['columns']
+    data = json.loads(parsed, strict=False)['data']
+
+    print('Imported ' + str(len(data)) + ' datapoints.')
+
+    # Ensure the directory exists
+    output_dir = '.github/excel2csv'
+    os.makedirs(output_dir, exist_ok=True)
+
+    with open(f'{output_dir}/{dataFileName}.csv', 'w+') as outFile:
+        # Write the metadata
+        for line, val in metaData.items():
+            outFile.write(line + ':,' + str(val) + '\n')
+        outFile.write('\n')
+        # Write the data
+        outFile.write(','.join(labels) + '\n')
+        for line in data:
+            outFile.write(','.join(str(val) for val in line) + '\n')
+
+        print(f'Successfully converted {datasheet} to {output_dir}/{dataFileName}.csv\n')
+
+
+def detectDatasheetsAndConvert(path: str):
+    '''This function detects all ULTERA-compliant Excel datasheets in a directory and converts them into
+    CSV files. It skips the empty template file.
+
+    Args:
+        path: Path to the directory containing ULTERA-compliant Excel datasheets.
+    '''
+
+    for file in os.listdir(path):
+        if file.endswith('.xlsx'):
+            if not fnmatch.fnmatch(file, 'template*.xlsx'):
+                print('Converting ' + file)
+                convert(path + '/' + file)
+            else:
+                print('Skipping ' + file)
+
+
+if __name__ == '__main__':
+    detectDatasheetsAndConvert(sys.argv[1])
diff --git a/.github/workflows/newfork.yml b/.github/workflows/newfork.yml
@@ -0,0 +1,19 @@
+name: Fork Notification
+on: fork
+
+jobs:
+  create-fork-issue:
+    runs-on: ubuntu-latest
+    permissions:
+      issues: write
+    steps:
+      - uses: actions/github-script@v6
+        with:
+          script: |
+            await github.rest.issues.create({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              title: 'New Contribute Fork Created',
+              body: `A new fork of this repository has been created by @${context.actor}\n\n@PhasesResearchLab/ultera-maintainers`,
+              labels: ['new fork']
+            });
diff --git a/.github/workflows/excel2csv.yml → .github/workflows/postcommit.yml b/.github/workflows/excel2csv.yml → .github/workflows/postcommit.yml
@@ -1,32 +1,36 @@
-name: excel2csv
-
-on: [push]
-
-jobs:
-  run:
-    name: excel2csv
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v3
-
-      - name: Set up Python 3.10
-        uses: actions/setup-python@v4
-        with:
-          python-version: '3.10'
-          cache: 'pip'
-          cache-dependency-path: 'pyqalloy-contribute/requirements.txt'
-
-      - name: Install Dependencies
-        run: |
-          python -m pip install -r pyqalloy-contribute/requirements.txt
-
-      - name: Run excel2csv
-        run: |
-          python pyqalloy-contribute/pyqalloy-contribute/excel2csv.py .
-
-      - name: Commit changes with Add & Commit
-        uses: EndBug/add-and-commit@v9
-        with:
-          message: '(automatic) excel2csv Action for Data Tracking'
-          add: '*.csv'
+name: postcommit
+
+on: [push]
+
+jobs:
+  run:
+    name: postcommit
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v3
+
+      - name: Set up Python 3.10
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.10'
+
+      - name: Install Dependencies
+        run: |
+          python -m pip install pandas openpyxl
+
+      - name: Run excel2csv
+        run: |
+          python .github/workflows/excel2csv.py .
+
+      - name: Run csv2readme
+        run: |
+          python .github/workflows/csv2readme.py .
+
+      - name: Commit changes with Add & Commit
+        uses: EndBug/add-and-commit@v9
+        with:
+          message: '(automatic) Action for Data Tracking'
+          add: |
+            .github/excel2csv/*.csv
+            README.md