Introduce encoder

* Add .gitignore * Add Makefile * Add test cases for encoder * Add test script for encoder * Add encoder.c and encoder.h * Add bminor.c
sghuang19 · Oct 11, 2023 · 2ea8a71 · 2ea8a71
1 parent 4504f60
commit 2ea8a71
Show file tree

Hide file tree

Showing 26 changed files with 375 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,9 @@
+.vscode
+build
+cmake-build-debug
+tags
+
+bminor
+*.o
+
+test/**/*.out
diff --git a/Makefile b/Makefile
@@ -0,0 +1,12 @@
+CC = gcc
+.DEFAULT_GOAL = bminor
+
+encoder.o: encoder.c
+	$(CC) -c $^ -o $@
+
+bminor: bminor.c encoder.o
+	$(CC) $^ -o $@
+
+clean:
+	rm -f *.o
+	rm -f bminor
diff --git a/bminor.c b/bminor.c
@@ -0,0 +1,55 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "encoder.h"
+
+void usage(int exit_code)
+{
+	printf("Usage of bminor.\n");
+	exit(exit_code);
+}
+
+int main(int argc, char* argv[])
+{
+
+	if (argc == 1)
+		usage(1);
+
+	for (int i = 1; i < argc; i++)
+	{
+		if (argv[i][0] == '-')
+		{
+			if (strcmp(argv[i], "--help") == 0)
+				usage(0);
+			else if (strcmp(argv[i], "--encode") == 0)
+			{
+				char* filename = argv[++i];
+				if (filename)
+				{
+					if (decode(filename) == 0)
+					{
+//						printf("Successfully decoded file %s\n", filename);
+						return EXIT_SUCCESS;
+					} else
+					{
+						fprintf(stderr, "Failed to decode file %s\n", filename);
+						return EXIT_FAILURE;
+					}
+				}
+				else
+				{
+					fprintf(stderr, "Missing filename to be encoded\n");
+					return EXIT_FAILURE;
+				}
+			}
+			else
+			{
+				fprintf(stderr, "Unknown option '%s'\n", argv[i]);
+				usage(1);
+			}
+		}
+	}
+
+	return EXIT_SUCCESS;
+}
diff --git a/encoder.c b/encoder.c
@@ -0,0 +1,256 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#define MAX_STRING_LEN 255
+
+int is_hex(char c)
+{
+	return c >= '0' && c <= '9' ||
+			c >= 'A' && c <= 'F' ||
+			c >= 'a' && c <= 'f';
+}
+
+int string_decode(const char* es, char* s)
+{
+//	printf("Decoding: %s\n", es);
+
+	// Check for start and end quotes
+	size_t es_len = strlen(es);
+	if (es_len < 2 || *es != '"' || *(es + es_len - 1) != '"')
+	{
+		fprintf(stderr, "Invalid string: does not start or end with a quote\n");
+		return 1;
+	}
+
+	const char* es_ptr = es + 1;
+	char* s_ptr = s;
+	size_t len = 0;
+
+	while (es_ptr < es + es_len - 1)
+	{
+		// Check current length of decoded string
+		if (len >= MAX_STRING_LEN)
+		{
+			fprintf(stderr, "Invalid string: too long\n");
+			return 1;
+		}
+
+		// Check for printable characters
+		if (*es_ptr < 32 || *es_ptr > 126)
+		{
+			fprintf(stderr, "Invalid string: invalid character\n");
+			return 1;
+		}
+
+		// Check for escape sequences
+		if (*es_ptr == '\\')
+		{
+			es_ptr++;
+			switch (*es_ptr)
+			{
+			case 'a':
+				*s_ptr = '\a';
+				break;
+			case 'b':
+				*s_ptr = '\b';
+				break;
+			case 'e':
+				*s_ptr = 27;
+				break;
+			case 'f':
+				*s_ptr = '\f';
+				break;
+			case 'n':
+				*s_ptr = '\n';
+				break;
+			case 'r':
+				*s_ptr = '\r';
+				break;
+			case 't':
+				*s_ptr = '\t';
+				break;
+			case 'v':
+				*s_ptr = '\v';
+				break;
+			case '\\':
+				*s_ptr = '\\';
+				break;
+			case '\'':
+				*s_ptr = '\'';
+				break;
+			case '"':
+				if (*(es_ptr + 1) == '\0')
+				{
+					fprintf(stderr, "Invalid string: escape sequence at end of string\n");
+					return 1;
+				}
+				*s_ptr = '"';
+				break;
+			case '0':
+				es_ptr++;
+				if (*es_ptr == 'x')
+				{
+					es_ptr++;
+					if (is_hex(*es_ptr) && is_hex(*(es_ptr + 1)))
+					{
+						char hex[2] = { *es_ptr, *(es_ptr + 1) };
+						char val = (char)strtol(hex, NULL, 16);
+						if (val < 0 || val > 127)
+						{
+							fprintf(stderr, "Invalid string: hex character not in ASCII\n");
+							return 1;
+						}
+						*s_ptr = val;
+						es_ptr += 1;
+					}
+					else
+					{
+						fprintf(stderr, "Invalid string: invalid hex escape sequence\n");
+						return 1;
+					}
+				}
+				break;
+			default:
+				fprintf(stderr, "Invalid string: invalid escape sequence\n");
+				return 1;
+			}
+			es_ptr++;
+			s_ptr++;
+			len++;
+			continue;
+		}
+
+		*s_ptr = *es_ptr;
+
+		es_ptr++;
+		s_ptr++;
+		len++;
+	}
+
+//		printf("Decoded result: %s\n", s);
+
+	return 0;
+}
+
+int string_encode(const char* s, char* es)
+{
+	const char* s_ptr = s;
+	char* es_ptr = es;
+
+	*es_ptr = '"';
+	es_ptr++;
+
+	while (*s_ptr != '\0')
+	{
+		// printf("Current: %s\n", s);
+		// Handle printable characters
+		if (*s_ptr >= 32 && *s_ptr <= 126)
+		{
+			if (*s_ptr == '"' || *s_ptr == '\\')
+			{
+				*es_ptr = '\\';
+				es_ptr++;
+			}
+			*es_ptr = *s_ptr;
+			es_ptr++;
+			s_ptr++;
+			continue;
+		}
+
+		// Handle escape sequences
+		*es_ptr = '\\';
+		es_ptr++;
+		switch (*s_ptr)
+		{
+		case '\a':
+			*es_ptr = 'a';
+			break;
+		case '\b':
+			*es_ptr = 'b';
+			break;
+		case 27:
+			*es_ptr = 'e';
+			break;
+		case '\f':
+			*es_ptr = 'f';
+			break;
+		case '\n':
+			*es_ptr = 'n';
+			break;
+		case '\r':
+			*es_ptr = 'r';
+			break;
+		case '\t':
+			*es_ptr = 't';
+			break;
+		case '\v':
+			*es_ptr = 'v';
+			break;
+
+		default:
+			sprintf(es_ptr, "0x%X", *s_ptr);
+			es_ptr += 3;
+			s_ptr++;
+			break;
+		}
+		es_ptr++;
+		s_ptr++;
+//		printf("Encoded result: %s\n", es);
+	}
+
+	*es_ptr = '"';
+	es_ptr++;
+	*es_ptr = '\0';
+	return 0;
+}
+
+int decode(char* filename)
+{
+	FILE* file = fopen(filename, "r");
+	if (file == NULL)
+	{
+		printf("Could not open file %s\n", filename);
+		return 1;
+	}
+
+	// Find the size of the file
+	fseek(file, 0, SEEK_END);
+	long file_size = ftell(file);
+	if (file_size > (MAX_STRING_LEN * 5 + 2) * sizeof(char))
+	{
+		fprintf(stderr, "Invalid string: too long\n");
+		fclose(file);
+		return 1;
+	}
+	rewind(file);
+
+	// Allocate memory for the file content
+	// Reserve space for \0
+	char* file_content = (char*)malloc(file_size + sizeof(char));
+	if (file_content == NULL)
+	{
+		perror("Could not allocate memory");
+		fclose(file);
+		return 1;
+	}
+
+	// Read the file content into the allocated memory
+	size_t chars_read = fread(file_content, sizeof(char), file_size, file);
+	file_content[chars_read] = '\0';
+	fclose(file);
+
+	char s[MAX_STRING_LEN + 1] = { 0 };
+
+	if (string_decode(file_content, s) == 0)
+	{
+		char es[chars_read];
+		memset(es, 0, chars_read);
+		string_encode(s, es);
+		printf("%s\n", es);
+		return 0;
+	}
+	else
+		return 1;
+
+}
diff --git a/encoder.h b/encoder.h
@@ -0,0 +1,5 @@
+int string_decode(const char* es, char* s);
+
+int string_encode(const char* s, char* es);
+
+int decode(char* filename);
diff --git a/runtest.sh b/runtest.sh
@@ -0,0 +1,19 @@
+#!/bin/sh
+
+export PATH=$PATH:.
+
+for testfile in test/encode/good*.bminor; do
+  if bminor --encode "$testfile" >"$testfile.out"; then
+    echo "$testfile success (as expected)"
+  else
+    echo "$testfile failure (INCORRECT)"
+  fi
+done
+
+for testfile in test/encode/bad*.bminor; do
+  if bminor --encode "$testfile" >"$testfile.out"; then
+    echo "$testfile success (INCORRECT)"
+  else
+    echo "$testfile failure (as expected)"
+  fi
+done
diff --git a/test/encode/bad0.bminor b/test/encode/bad0.bminor
diff --git a/test/encode/bad1.bminor b/test/encode/bad1.bminor
@@ -0,0 +1 @@
+"
diff --git a/test/encode/bad2.bminor b/test/encode/bad2.bminor
@@ -0,0 +1 @@
+"a \"
diff --git a/test/encode/bad3.bminor b/test/encode/bad3.bminor
@@ -0,0 +1 @@
+"\\\"
diff --git a/test/encode/bad4.bminor b/test/encode/bad4.bminor
@@ -0,0 +1 @@
+"\ "
diff --git a/test/encode/bad5.bminor b/test/encode/bad5.bminor
@@ -0,0 +1 @@
+"\0x"
diff --git a/test/encode/bad6.bminor b/test/encode/bad6.bminor
@@ -0,0 +1 @@
+" 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789..."
diff --git a/test/encode/bad7.bminor b/test/encode/bad7.bminor
@@ -0,0 +1 @@
+"\"\\\a\0xA "
diff --git a/test/encode/bad8.bminor b/test/encode/bad8.bminor
@@ -0,0 +1 @@
+" \ n \0xAB"
diff --git a/test/encode/bad9.bminor b/test/encode/bad9.bminor
@@ -0,0 +1 @@
+"\ n"
diff --git a/test/encode/good0.bminor b/test/encode/good0.bminor
@@ -0,0 +1 @@
+"Hello \n World!"
diff --git a/test/encode/good1.bminor b/test/encode/good1.bminor
@@ -0,0 +1 @@
+"\a\b\e\f\n\r\t\v\\\'\"and\0x7F s \0x0Aabs"
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		" 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789..."