diff --git a/backend/oasst_backend/tree_manager.py b/backend/oasst_backend/tree_manager.py index 45fcc69555..5297cde8d0 100644 --- a/backend/oasst_backend/tree_manager.py +++ b/backend/oasst_backend/tree_manager.py @@ -1085,7 +1085,7 @@ def _query_need_review( def query_prompts_need_review(self, lang: str) -> list[Message]: """ - Select initial prompt messages with less then required rankings in active message tree + Select initial prompt messages with less than required rankings in active message tree (active == True in message_tree_state) """ return self._query_need_review( @@ -1094,7 +1094,7 @@ def query_prompts_need_review(self, lang: str) -> list[Message]: def query_replies_need_review(self, lang: str) -> list[Message]: """ - Select child messages (parent_id IS NOT NULL) with less then required rankings + Select child messages (parent_id IS NOT NULL) with less than required rankings in active message tree (active == True in message_tree_state) """ return self._query_need_review(message_tree_state.State.GROWING, self.cfg.num_reviews_reply, False, lang) diff --git a/data/datasets/TSSB-3M/generate_dataset.py b/data/datasets/TSSB-3M/generate_dataset.py index ec785e83bd..e182d79fd4 100644 --- a/data/datasets/TSSB-3M/generate_dataset.py +++ b/data/datasets/TSSB-3M/generate_dataset.py @@ -117,7 +117,7 @@ def clean(text): def clean_PII(text): - # Remove sign-off messege generated by `git commit --signoff`, eg. "Signed-off-by: user_name " + # Remove sign-off message generated by `git commit --signoff`, eg. "Signed-off-by: user_name " signoff_index = text.rfind("\n\nSigned-off-by:") if signoff_index != -1: # Remove the sign-off string from the commit message diff --git a/data/datasets/instructional_codesearchnet_python/Summarize_codesearchnet_for_python.ipynb b/data/datasets/instructional_codesearchnet_python/Summarize_codesearchnet_for_python.ipynb index 0610cfb00d..c392c33df2 100644 --- a/data/datasets/instructional_codesearchnet_python/Summarize_codesearchnet_for_python.ipynb +++ b/data/datasets/instructional_codesearchnet_python/Summarize_codesearchnet_for_python.ipynb @@ -26,7 +26,7 @@ "id": "K9sCPQzIb278" }, "source": [ - "### DOWLOAD THE DATASET" + "### DOWNLOAD THE DATASET" ] }, { @@ -156,7 +156,7 @@ "id": "3MxfnNxX2n0m" }, "source": [ - "### GENERATE THE SUMMARIES AND ANOTATE THE DATASET" + "### GENERATE THE SUMMARIES AND ANNOTATE THE DATASET" ] }, { diff --git a/data/datasets/recipes/tasty_recipes.ipynb b/data/datasets/recipes/tasty_recipes.ipynb index b064172c92..cacb6acdb6 100644 --- a/data/datasets/recipes/tasty_recipes.ipynb +++ b/data/datasets/recipes/tasty_recipes.ipynb @@ -158,7 +158,7 @@ " for i, instruction in enumerate(ingredient_and_instructions[row[\"slug\"]][\"instructions\"]):\n", " instructions += f\"\\n{i+1}. {convert_fraction_unicode_chars_to_strings(instruction['display_text'])}\"\n", "\n", - " # Constuct the full response\n", + " # Construct the full response\n", " response = f\"\"\"Here's a recipe for {recipe_name}:\n", "\n", "Ingredients:\n", diff --git a/data/datasets/safety_directory/emergency_infos/wikipedia_emergency_info.js b/data/datasets/safety_directory/emergency_infos/wikipedia_emergency_info.js index a36cd5a5d3..f0feca9ad5 100644 --- a/data/datasets/safety_directory/emergency_infos/wikipedia_emergency_info.js +++ b/data/datasets/safety_directory/emergency_infos/wikipedia_emergency_info.js @@ -1,5 +1,5 @@ /** - * Developper console script used to generate the associated json file. + * Developer console script used to generate the associated json file. * Wikipedia URL : https://en.wikipedia.org/wiki/List_of_suicide_crisis_lines * Author : Lucas Oulieu */ diff --git a/data/datasets/tv_dialogue/README.md b/data/datasets/tv_dialogue/README.md index 2df7d03d9d..d607c2952e 100644 --- a/data/datasets/tv_dialogue/README.md +++ b/data/datasets/tv_dialogue/README.md @@ -47,7 +47,7 @@ How's it going? on Huggingface! They are examples on Huggingface. -CUT OUT TO ANOTHER SCENCE +CUT OUT TO ANOTHER SCENE We are somewhere else [PERSON 1 (v.o)] I wonder where we are? diff --git a/docs/docs/architecture/inference.md b/docs/docs/architecture/inference.md index 02daf6cc6c..3b6c83d104 100644 --- a/docs/docs/architecture/inference.md +++ b/docs/docs/architecture/inference.md @@ -111,7 +111,7 @@ The inference server is built around [FastAPI](https://fastapi.tiangolo.com/). for any other currently pending messages in the chat to `inference.MessageState.cancelled`. 3. After updating the `message` table, we create a RedisQueue for this - specific message and enque the message. + specific message and enqueue the message. 4. Finally, we return an `inference.MessageRead` (a Pydantic model) to the client. This is the object contains the needed `message_id`. diff --git a/inference/server/oasst_inference_server/compliance.py b/inference/server/oasst_inference_server/compliance.py index 0950a9cc8b..625ee3af9c 100644 --- a/inference/server/oasst_inference_server/compliance.py +++ b/inference/server/oasst_inference_server/compliance.py @@ -57,7 +57,7 @@ async def run_compliance_check(websocket: fastapi.WebSocket, worker_id: str, wor Run a compliance check for the given worker: - Find a suitable compliance check assistant message - Task the worker with generating a response with the same context - - Compare the respons against the existing completed message + - Compare the response against the existing completed message - Update the database with the outcome """ async with deps.manual_create_session() as session: diff --git a/model/model_eval/manual/create_synth_import.py b/model/model_eval/manual/create_synth_import.py index 5f5760765d..2572cd6359 100644 --- a/model/model_eval/manual/create_synth_import.py +++ b/model/model_eval/manual/create_synth_import.py @@ -83,7 +83,7 @@ def main(): reply_texts.add(m.text) if len(unique_replies) < 2: - print("Skipping enty with < 2 unique replies") + print("Skipping entry with < 2 unique replies") continue prompt_message = ExportMessageNode( diff --git a/model/model_training/custom_datasets/formatting.py b/model/model_training/custom_datasets/formatting.py index a5bfdc7394..e3c7ec2e29 100644 --- a/model/model_training/custom_datasets/formatting.py +++ b/model/model_training/custom_datasets/formatting.py @@ -74,7 +74,7 @@ def system_tag( shuffle(properties) - # ensure that potentially multi-line conext field comes last + # ensure that potentially multi-line context field comes last if self.context: properties.append(("context", self.context)) diff --git a/model/model_training/models/__init__.py b/model/model_training/models/__init__.py index 8ada11ab17..44c0968ee4 100644 --- a/model/model_training/models/__init__.py +++ b/model/model_training/models/__init__.py @@ -2,7 +2,7 @@ def freeze_top_n_layers(model, target_layers): - # its possible we can simply detect which module is a ModuleList + # it's possible we can simply detect which module is a ModuleList # and simply freeze the module without doing string parsing for name, param in model.named_parameters(): if "embed" in name: diff --git a/model/model_training/models/patching_falcon.py b/model/model_training/models/patching_falcon.py index 292c9aa13a..dbf6e8de28 100644 --- a/model/model_training/models/patching_falcon.py +++ b/model/model_training/models/patching_falcon.py @@ -19,7 +19,7 @@ def falcon_forward_with_flash_attn( ) -> Tuple[torch.Tensor, Optional[torch.Tensor]]: """ head_mask, alibi & output_attention are not supported. - Reference to the original `FalconAttention.forwad()` method which this patch replaces: + Reference to the original `FalconAttention.forward()` method which this patch replaces: https://github.com/huggingface/transformers/blob/c965d302791cf935d6ea7776428749be678cf509/src/transformers/models/falcon/modeling_falcon.py#L281 """ diff --git a/notebooks/TSSB-3M-bugs-dataset/TSSB-3M-bugs_dataset.ipynb b/notebooks/TSSB-3M-bugs-dataset/TSSB-3M-bugs_dataset.ipynb index a5577e3c86..a1bf0e25a6 100644 --- a/notebooks/TSSB-3M-bugs-dataset/TSSB-3M-bugs_dataset.ipynb +++ b/notebooks/TSSB-3M-bugs-dataset/TSSB-3M-bugs_dataset.ipynb @@ -707,10 +707,10 @@ "\n", "g = Github()\n", "\n", - "# TO DO, find a way to get a commmit from SHA\n", + "# TO DO, find a way to get a commit from SHA\n", "# 1. Use GitHub API\n", "# 2. Download repos with their history\n", - "# 3. Web scaping" + "# 3. Web scraping" ] }, { diff --git a/notebooks/data-augmentation/essay-revision/essay-revision.ipynb b/notebooks/data-augmentation/essay-revision/essay-revision.ipynb index 667a5cd597..3c22fb7778 100644 --- a/notebooks/data-augmentation/essay-revision/essay-revision.ipynb +++ b/notebooks/data-augmentation/essay-revision/essay-revision.ipynb @@ -201,7 +201,7 @@ }, "outputs": [], "source": [ - "# Make grammar erros (more like: change random words into words of similar meaning)\n", + "# Make grammar errors (more like: change random words into words of similar meaning)\n", "import nltk\n", "from nltk.corpus import wordnet\n", "import random\n", diff --git a/notebooks/data-augmentation/unified-qa/unified-qa.ipynb b/notebooks/data-augmentation/unified-qa/unified-qa.ipynb index 95995d5e7e..56478a306f 100644 --- a/notebooks/data-augmentation/unified-qa/unified-qa.ipynb +++ b/notebooks/data-augmentation/unified-qa/unified-qa.ipynb @@ -1004,7 +1004,7 @@ "metadata": {}, "outputs": [], "source": [ - "random.seed(20) # for reproduciablity" + "random.seed(20) # for reproducibility" ] }, { @@ -1038,7 +1038,7 @@ " answer = item.Answer\n", " if question == np.nan or answer == np.nan:\n", " print(\"Skipped\")\n", - " # get a random conversation generatore function\n", + " # get a random conversation generator function\n", " conv_func = random.choice(conv_funcs)\n", " try:\n", " conv_list = conv_func(question, answer)\n", diff --git a/notebooks/data-augmentation/wikidata-qa/wikidata.ipynb b/notebooks/data-augmentation/wikidata-qa/wikidata.ipynb index 7a6662b16d..b4403f987e 100644 --- a/notebooks/data-augmentation/wikidata-qa/wikidata.ipynb +++ b/notebooks/data-augmentation/wikidata-qa/wikidata.ipynb @@ -810,7 +810,7 @@ " \"{sub} is used mostly for {a}.\",\n", " \"{name} is mostly known for {a}.\",\n", " ],\n", - " \"P487\": [\"{a}\", \"The {name} emoji is {a}.\", \"The {a} character repesents {name}.\"],\n", + " \"P487\": [\"{a}\", \"The {name} emoji is {a}.\", \"The {a} character represents {name}.\"],\n", " \"P509\": [\"{name} died of {a}.\", \"The cause of {pos} death was {a}.\"],\n", " \"P527\": [\"{name} are made of {a}.\", \"They are made of {a}.\"],\n", " \"P569\": [\"{name} was born on {a}.\", \"{pos} birthday is on the {a}.\"],\n", @@ -828,12 +828,12 @@ " ],\n", " \"P580\": [\"{name} started in {a}.\", \"{name} first started at {a}.\"],\n", " \"P582\": [\"{name} ended in {a}.\", \"{name} lasted until {a}.\"],\n", - " \"P625\": [\"{name} is lcoated at {a}.\", \"The coordinates for {name} are {a}.\", \"{pos} GPS location is {a}.\"],\n", + " \"P625\": [\"{name} is located at {a}.\", \"The coordinates for {name} are {a}.\", \"{pos} GPS location is {a}.\"],\n", " \"P837\": [\"{name} is celebrated on {a}.\", \"{name} is on {a}.\"],\n", " \"P856\": [\n", " \"The URL for {name} is: {a}\",\n", " \"See {a}\",\n", - " \"The URL of {pos} webiste is {a}\",\n", + " \"The URL of {pos} website is {a}\",\n", " \"{pos} web address is: {a}\",\n", " ],\n", " \"P973\": [\n", @@ -855,7 +855,7 @@ " \"P2043\": [\"{name} is {a} long.\", \"{sub} has a length of {a}.\"],\n", " \"P2044\": [\"{name} is {a} tall.\", \"{name} is {a} above sea level.\", \"{pos} elevation is {a}.\"],\n", " \"P2046\": [\"{name}'s area is {a}\", \"{pos} area is {a}.\"],\n", - " \"P2049\": [\"{name}'s widht is {a}.\", \"{name} is {a} wide.\"],\n", + " \"P2049\": [\"{name}'s width is {a}.\", \"{name} is {a} wide.\"],\n", " \"P2250\": [\"{name} have a life expectancy of {a}.\", \"{pos} life expectancy is about {a}.\"],\n", " \"P2283\": [\n", " \"{name} uses {a} to work.\",\n", @@ -887,20 +887,20 @@ " \"{pos} {l} children are {a}.\",\n", " ],\n", " \"P50\": [\"{name} was co-written by {a}.\", \"The authors of {name} are {a}.\"],\n", - " \"P57\": [\"{name} was direcrted by the following people: {a}.\", \"{a} were the directors of {name}.\"],\n", + " \"P57\": [\"{name} was directed by the following people: {a}.\", \"{a} were the directors of {name}.\"],\n", " \"P61\": [\"{pos} inventors are {a}.\", \"{name} was discovered by {a}.\"],\n", " \"P106\": [\"{name} has multiple occupations: {a}.\", \"{name}'s job titles are: {a}.\"],\n", " \"P169\": [\"{name} is the CEO of multiple companies, such as {a}.\", \"{sub} is the CEO at {a}.\"],\n", " \"P225\": [\"The taxon names for {name} are {a}.\", \"The proper scientific terms for {name} are {a}.\"],\n", " \"P246\": [\"The elements of {name} are {a}.\", \"The symbols for {name} are {a}.\"],\n", " \"P274\": [\"The formulas for {name} are {a}.\", \"The chemical formulas of the compound {name} are {a}.\"],\n", - " \"P487\": [\"The {name} emojis are {a}.\", \"The characters {a} repesent {name}.\"],\n", + " \"P487\": [\"The {name} emojis are {a}.\", \"The characters {a} represent {name}.\"],\n", " \"P527\": [\"The ingredients of {name} are {a}.\", \"{a} are all parts needed for {name}.\"],\n", " \"P575\": [\n", " \"Sources disagree on the exact date, it is said that {name} was invented in {a}.\",\n", " \"{name} was discovered multiple times at {a}.\",\n", " ],\n", - " \"P856\": [\"The URLs for {name} are: {a}\", \"See {a}\", \"The URLs of {pos} webiste are {a}\"],\n", + " \"P856\": [\"The URLs for {name} are: {a}\", \"See {a}\", \"The URLs of {pos} website are {a}\"],\n", " \"P625\": [\n", " \"{name} can be found under the following GPS locations: {a}.\",\n", " \"The coordinates for {name} are {a}.\",\n", diff --git a/notebooks/detoxify-evaluation/detoxify-evaluation.ipynb b/notebooks/detoxify-evaluation/detoxify-evaluation.ipynb index 5a380ea967..26f4344b20 100644 --- a/notebooks/detoxify-evaluation/detoxify-evaluation.ipynb +++ b/notebooks/detoxify-evaluation/detoxify-evaluation.ipynb @@ -327,11 +327,11 @@ "\n", "| Model name | Not obviously toxic| Not obviously non-toxic | Obviously toxic| Obviously non-toxic|\n", "| :---: | :---: | :---: |:---: | :---: |\n", - "|original| failed at all, easily accepted racist, sexist overally toxic prompts that were well formulated |Very sensitive on swear words, failed to reckognize context| good performance|good performance|\n", - "|unbiased|Managed to find some hidden toxicity but not on all sentences| Very sensitive explicit language but shown ability to recognize context| Did well but failed to reckognize some gender stereotype mockery | good performance\n", - "|multilingual|Managed to find some hidden toxicity but not on all sentences| Very sensitive explicit language but shown ability to recognize context| Did well but failed to reckognize some gender stereotype mockery | good performance\n", + "|original| failed at all, easily accepted racist, sexist overally toxic prompts that were well formulated |Very sensitive on swear words, failed to recognize context| good performance|good performance|\n", + "|unbiased|Managed to find some hidden toxicity but not on all sentences| Very sensitive explicit language but shown ability to recognize context| Did well but failed to recognize some gender stereotype mockery | good performance\n", + "|multilingual|Managed to find some hidden toxicity but not on all sentences| Very sensitive explicit language but shown ability to recognize context| Did well but failed to recognize some gender stereotype mockery | good performance\n", "\n", - "Subjectivly 'unbiased' looks like the best performing model. \n", + "Subjectively 'unbiased' looks like the best performing model. \n", "\n", "I don't think it would do well as a security layer in a live version of open assistant unless we do some finetuning first, because it can be fooled to pass toxicity if it's presented in formal language. \n", "\n", diff --git a/website/src/lib/oasst_api_client.ts b/website/src/lib/oasst_api_client.ts index 61b7a27cec..71650d35d0 100644 --- a/website/src/lib/oasst_api_client.ts +++ b/website/src/lib/oasst_api_client.ts @@ -230,7 +230,7 @@ export class OasstApiClient { } /** - * Modify a message's content and save it's previous content as a revision + * Modify a message's content and save its previous content as a revision */ async edit_message(message_id: string, user: BackendUserCore, new_content: string) { return this.post(`/api/v1/messages/${message_id}/edit`, {