From 9fa68697f51fef69d053b8bb62fbde0d3877dce0 Mon Sep 17 00:00:00 2001 From: Rose Date: Fri, 9 Aug 2024 12:39:48 -0500 Subject: [PATCH] new string.h implementation & tests --- .gitignore | 1 + Makefile | 2 +- dosfs/div.c | 15 +++ dosfs/dosfs.c | 3 +- dosfs/tmpstring.c | 99 --------------- host_test/Makefile | 24 ++++ host_test/main.c | 14 ++ host_test/str_test.c | 297 +++++++++++++++++++++++++++++++++++++++++++ include/string.h | 10 ++ string.nasm | 235 ++++++++++++++++++++++++++++++++++ 10 files changed, 599 insertions(+), 101 deletions(-) create mode 100644 dosfs/div.c delete mode 100644 dosfs/tmpstring.c create mode 100644 host_test/Makefile create mode 100644 host_test/main.c create mode 100644 host_test/str_test.c create mode 100644 include/string.h create mode 100644 string.nasm diff --git a/.gitignore b/.gitignore index e31eb50..36f4b4b 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ bx_enh_dbg.ini .cache compile_commands.json +host_test/test diff --git a/Makefile b/Makefile index c3920a2..0de770a 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ objects = entry.o kernel.o task.o handler.o interrupt.o v86.o print.o tss.o gdt.o\ paging.o fault.o tests.o kbd.o helper.o disk.o file.o fs.o dosfs/dosfs.o fs_dos.o\ - progs.o hexedit.o textedit.o + progs.o hexedit.o textedit.o string.o CFLAGS = -target "i386-elf" -m32 -mgeneral-regs-only -ffreestanding\ -march=i386 -fno-stack-protector -Wno-int-conversion -nostdlib -c -Iinclude LFLAGS = -Wl,--gc-sections -Wl,--print-gc-sections -m32 -nostartfiles -nostdlib diff --git a/dosfs/div.c b/dosfs/div.c new file mode 100644 index 0000000..5e428db --- /dev/null +++ b/dosfs/div.c @@ -0,0 +1,15 @@ +#pragma once +#include + +/* STDLIB DIV FUNCTIONS */ +typedef struct { int32_t quot, rem; } div_t; +typedef struct { int64_t quot, rem; } ldiv_t; +div_t div(int num, int den) +{ + return (div_t){ num/den, num%den }; +} +ldiv_t ldiv(long num, long den) +{ + return (ldiv_t){ num/den, num%den }; +} + diff --git a/dosfs/dosfs.c b/dosfs/dosfs.c index 6ce007b..9e3479d 100755 --- a/dosfs/dosfs.c +++ b/dosfs/dosfs.c @@ -1,4 +1,5 @@ -#include "tmpstring.c" +#include +#include "div.c" /* DOSFS Embedded FAT-Compatible Filesystem diff --git a/dosfs/tmpstring.c b/dosfs/tmpstring.c deleted file mode 100644 index 4fc821a..0000000 --- a/dosfs/tmpstring.c +++ /dev/null @@ -1,99 +0,0 @@ -#pragma once -#include -#include - -void *memcpy(void *restrict dest, const void *restrict src, size_t n) -{ - unsigned char *d = dest; - const unsigned char *s = src; - for (; n; n--) *d++ = *s++; - return dest; -} - -void *memset(void *dest, int c, size_t n) -{ - unsigned char *s = dest; - size_t k; - - /* Fill head and tail with minimal branching. Each - * conditional ensures that all the subsequently used - * offsets are well-defined and in the dest region. */ - - if (!n) return dest; - s[0] = c; - s[n-1] = c; - if (n <= 2) return dest; - s[1] = c; - s[2] = c; - s[n-2] = c; - s[n-3] = c; - if (n <= 6) return dest; - s[3] = c; - s[n-4] = c; - if (n <= 8) return dest; - - /* Advance pointer to align it at a 4-byte boundary, - * and truncate n to a multiple of 4. The previous code - * already took care of any head/tail that get cut off - * by the alignment. */ - - k = -(uintptr_t)s & 3; - s += k; - n -= k; - n &= -4; - - /* Pure C fallback with no aliasing violations. */ - for (; n; n--, s++) *s = c; - - return dest; -} - -size_t strlen(const char *s) -{ - const char *a = s; - for (; *s; s++); - return s-a; -} - -int memcmp(const void *vl, const void *vr, size_t n) -{ - const unsigned char *l=vl, *r=vr; - for (; n && *l == *r; n--, l++, r++); - return n ? *l-*r : 0; -} - -char *strncpy(char *restrict d, const char *restrict s, size_t n) -{ - for (; n && (*d=*s); n--, s++, d++); - memset(d, 0, n); - return d; -} - -char *strcpy(char *restrict dest, const char *restrict src) -{ - char *restrict d = dest; - const char *restrict s = src; - for (; (*d=*s); s++, d++); - - return d; -} - -int strcmp(const char *l, const char *r) -{ - for (; *l==*r && *l; l++, r++); - return *(unsigned char *)l - *(unsigned char *)r; -} - - -/* STDLIB DIV FUNCTIONS */ -typedef struct { int32_t quot, rem; } div_t; -typedef struct { int64_t quot, rem; } ldiv_t; -div_t div(int num, int den) -{ - return (div_t){ num/den, num%den }; -} -ldiv_t ldiv(long num, long den) -{ - return (ldiv_t){ num/den, num%den }; -} - diff --git a/host_test/Makefile b/host_test/Makefile new file mode 100644 index 0000000..6b6f63c --- /dev/null +++ b/host_test/Makefile @@ -0,0 +1,24 @@ +rose_objects = string.o +objects = main.o +CFLAGS = -m32 -mgeneral-regs-only -march=i386 -fno-stack-protector -Wno-int-conversion -c -Iinclude +LFLAGS = -m32 + +all: test + ./test + +test: $(objects) $(rose_objects) + clang $(LFLAGS) -o $@ $^ + +%.o: ../%.nasm + nasm -f elf32 -o $@ $< + objcopy --prefix-symbols=rose_ $@ + +%.o: %.c + clang $(CFLAGS) -ffunction-sections -fdata-sections -Os -o $@ $< + +%.o: ../%.c + clang $(CFLAGS) -nostdlib -ffunction-sections -fdata-sections -Os -o $@ $< + objcopy --prefix-symbols=rose_ $@ + +clean: + rm -f $(objects) $(rose_objects) test diff --git a/host_test/main.c b/host_test/main.c new file mode 100644 index 0000000..d610ab9 --- /dev/null +++ b/host_test/main.c @@ -0,0 +1,14 @@ +#include + +#include "str_test.c" + +int main() { + int fail = 0; + puts("\t--- Host tests for ROSE ---\n"); + + fail |= do_str_test(); + + puts(fail ? "\n\t--- ROSE FAILED TEST ---" : "\n\t--- ROSE PASSED TEST ---"); + return fail; +} + diff --git a/host_test/str_test.c b/host_test/str_test.c new file mode 100644 index 0000000..6ef643e --- /dev/null +++ b/host_test/str_test.c @@ -0,0 +1,297 @@ +#include +#include +#include +#include + +int rose_memcmp(const void *s1, const void *s2, size_t n); + +const char *MEMCMP_S0 = "0000000000000000"; +const char *MEMCMP_S1 = "0000000000000001"; +const char *MEMCMP_S2 = "0000000000000002"; +int test_memcmp() { + int res, test, fail = 0; + + res = rose_memcmp(MEMCMP_S0, MEMCMP_S0, 0); + test = res == 0; + printf("%s\tmemcmp n=0\n", test ? "PASS" : "FAIL"); + fail |= !test; + + res = rose_memcmp(MEMCMP_S0, MEMCMP_S1, 1); + test = res == 0; + printf("%s\tmemcmp n=1, strings equal\n", test ? "PASS" : "FAIL"); + fail |= !test; + + res = rose_memcmp(MEMCMP_S0, MEMCMP_S1, 2); + test = res == 0; + printf("%s\tmemcmp n=2, strings equal\n", test ? "PASS" : "FAIL"); + fail |= !test; + + res = rose_memcmp(MEMCMP_S0, MEMCMP_S1, 3); + test = res == 0; + printf("%s\tmemcmp n=3, strings equal\n", test ? "PASS" : "FAIL"); + fail |= !test; + + res = rose_memcmp(MEMCMP_S0, MEMCMP_S1, 4); + test = res == 0; + printf("%s\tmemcmp n=4, strings equal\n", test ? "PASS" : "FAIL"); + fail |= !test; + + res = rose_memcmp(MEMCMP_S0, MEMCMP_S1, 15); + test = res == 0; + printf("%s\tmemcmp n=15, strings equal\n", test ? "PASS" : "FAIL"); + fail |= !test; + + res = rose_memcmp(MEMCMP_S0, MEMCMP_S1, 16); + test = res == -1; + printf("%s\tmemcmp n=16, first less\n", test ? "PASS" : "FAIL"); + fail |= !test; + + res = rose_memcmp(MEMCMP_S2, MEMCMP_S1, 16); + test = res == 1; + printf("%s\tmemcmp n=16, first greater\n", test ? "PASS" : "FAIL"); + fail |= !test; + + res = rose_memcmp(&MEMCMP_S1[1], &MEMCMP_S2[1], 1000000); + test = res == -1; + printf("%s\tmemcmp n=1000000, first less\n", test ? "PASS" : "FAIL"); + fail |= !test; + + res = rose_memcmp(&MEMCMP_S2[1], &MEMCMP_S1[1], 1000000); + test = res == 1; + printf("%s\tmemcmp n=1000000, first greater\n", test ? "PASS" : "FAIL"); + fail |= !test; + + return fail; +} + + +int rose_strcmp(const void *s1, const void *s2); + +const char *STRCMP_S0 = "0000000000000000"; +const char *STRCMP_S1 = "0000000000000001"; +const char *STRCMP_S2 = "0000000000000002"; +int test_strcmp() { + int res, test, fail = 0; + + res = rose_strcmp(STRCMP_S0, STRCMP_S0); + test = res == 0; + printf("%s\tstrcmp, strings equal\n", test ? "PASS" : "FAIL"); + fail |= !test; + + res = rose_strcmp(&STRCMP_S0[1], STRCMP_S0); + test = res == -'0'; + printf("%s\tstrcmp, first shorter\n", test ? "PASS" : "FAIL"); + fail |= !test; + + res = rose_strcmp(STRCMP_S0, &STRCMP_S0[1]); + test = res == '0'; + printf("%s\tstrcmp, second shorter\n", test ? "PASS" : "FAIL"); + fail |= !test; + + res = rose_strcmp(STRCMP_S0, STRCMP_S1); + test = res == -1; + printf("%s\tstrcmp, first less\n", test ? "PASS" : "FAIL"); + fail |= !test; + + res = rose_strcmp(STRCMP_S2, STRCMP_S1); + test = res == 1; + printf("%s\tstrcmp, first greater\n", test ? "PASS" : "FAIL"); + fail |= !test; + + char n = 0; + res = rose_strcmp(&n, &n); + test = res == 0; + printf("%s\tstrcmp, empty strings\n", test ? "PASS" : "FAIL"); + fail |= !test; + + return fail; +} + +/* Thanks Plasma Ofthedawn! */ +int rose_strlen(const void *s1); + +const char STRLEN_S0[] = {'a', 'b', 'c', 'd', 'e', '\0'}; +const char STRLEN_S1[] = {'\0', 'a', 'b', 'c', '\0'}; +const char STRLEN_S2[] = {'\1', '\xff', '\0'}; + +int test_strlen() { + int res, test, fail = 0; + + res = rose_strlen(STRLEN_S0); + test = res == 5; + printf("%s\tstrlen, normal string. is %d, should be 5\n", test ? "PASS" : "FAIL", res); + fail |= !test; + + res = rose_strlen(STRLEN_S1); + test = res == 0; + printf("%s\tstrlen, length 0. is %d, should be 0\n", test ? "PASS" : "FAIL", res); + fail |= !test; + + res = rose_strlen(STRLEN_S2); + test = res == 2; + printf("%s\tstrlen, weird charcacters. is %d, should be 2\n", test ? "PASS" : "FAIL", res); + fail |= !test; + + return fail; +} + +void *rose_memcpy(void *dest, const void *src, size_t n); + +int test_memcpy() { + void *res; + int test, fail = 0; + + const size_t n = 1 << 20; + char *d = malloc(n); + char *s = malloc(n); + + getrandom(s, n, 0); + + s[15] = 0; + d[15] = 1; + + res = rose_memcpy(d, s, 15); + test = res == d && d[15] == 1 && memcmp(s, d, 15) == 0; + printf("%s\tmemcpy, length 15\n", test ? "PASS" : "FAIL"); + fail |= !test; + + res = rose_memcpy(d, s, n); + test = res == d && memcmp(s, d, n) == 0; + printf("%s\tmemcpy, length %d\n", test ? "PASS" : "FAIL", n); + fail |= !test; + + d[0] = s[0] - 1; + res = rose_memcpy(d, s, 0); + test = res == d && memcmp(d, s, 1) == -1; + printf("%s\tmemcpy, length 0\n", test ? "PASS" : "FAIL"); + fail |= !test; + + free(d); + free(s); + + return fail; +} + +char *rose_strcpy(char *restrict dst, const char *restrict src); + +int test_strcpy() { + void *res; + int test, fail = 0; + + const size_t n = 1 << 16; + char *d = malloc(n); + char *s = malloc(n); + + getrandom(s, n, 0); + for (int i = 1; i < 256; i++) s[i - 1] = i; + s[n-1] = 0; // fix very unlikely case + + res = rose_strcpy(d, s); + test = res == d && strcmp(s, d) == 0; + printf("%s\tstrcpy, random string\n", test ? "PASS" : "FAIL"); + fail |= !test; + + d[0] = 1; + s[0] = 0; + res = rose_strcpy(d, s); + test = res == d && d[0] == 0; + printf("%s\tstrcpy, length 0\n", test ? "PASS" : "FAIL"); + fail |= !test; + + free(d); + free(s); + + return fail; +} + +void *rose_memset(void *s, int c, size_t n); + +int test_memset() { + void *res; + int test, fail = 0; + + const size_t n = 1 << 16; + char *s = malloc(n); + char *d = malloc(n); + const int c = 0x42; + memset(d, c, n); + + res = rose_memset(s, c, n); + test = res == s && memcmp(s, d, n) == 0; + printf("%s\tmemset, length %d\n", test ? "PASS" : "FAIL", n); + fail |= !test; + + res = rose_memset(s, 0, 1); + test = res == s && s[0] == 0 && s[1] == c; + printf("%s\tmemset, bound check\n", test ? "PASS" : "FAIL"); + fail |= !test; + + res = rose_memset(s, c, 0); + test = res == s && s[0] == 0; + printf("%s\tmemset, length 0\n", test ? "PASS" : "FAIL"); + fail |= !test; + + free(s); + free(d); + + return fail; +} + +char *rose_strncpy(char *restrict dst, const char *restrict src, size_t dsize); + +int test_strncpy() { + void *res; + int test, fail = 0; + + const size_t n = 1 << 16; + char *d = malloc(n); + char *s = malloc(n); + char *x = malloc(n); + + getrandom(s, n, 0); + for (int i = 1; i < 256; i++) s[i - 1] = i; + + strncpy(x, s, n); + res = rose_strncpy(d, s, n); + test = res == d && memcmp(x, d, n) == 0; + printf("%s\tstrcpy, random string, length %d\n", test ? "PASS" : "FAIL", n); + fail |= !test; + + memset(s, 0x42, n); + strncpy(x, s, 255); + x[255] = 0; + d[255] = 1; + res = rose_strncpy(d, s, 255); + test = res == d && memcmp(x, d, 256) == -1; + printf("%s\tstrcpy, bounds check\n", test ? "PASS" : "FAIL"); + fail |= !test; + + d[0] = 1; + res = rose_strncpy(d, s, 0); + test = res == d && d[0] == 1; + printf("%s\tstrcpy, length 0\n", test ? "PASS" : "FAIL"); + fail |= !test; + + free(d); + free(s); + free(x); + + return fail; +} + +int do_str_test() { + int fail = 0; + + puts("[String Tests]"); + + fail |= test_memcmp(); + fail |= test_strcmp(); + fail |= test_strlen(); + fail |= test_memcpy(); + fail |= test_strcpy(); + fail |= test_memset(); + fail |= test_strncpy(); + puts(fail ? "STRING TESTS FAILED" : "STRING TESTS PASSED"); + + return fail; +} diff --git a/include/string.h b/include/string.h new file mode 100644 index 0000000..abba9c2 --- /dev/null +++ b/include/string.h @@ -0,0 +1,10 @@ +#include + +int memcmp(const void *s1, const void *s2, uint32_t n); +void *memcpy(void *dest, const void *src, uint32_t n); +void *memset(void *s, int c, uint32_t n); +int strcmp(const char *s1, const char *s2); +char *stpcpy(char *restrict dst, const char *restrict src); +char *strcpy(char *restrict dst, const char *restrict src); +uint32_t strlen(const char *s); +char *strncpy(char *restrict dst, const char *restrict src, uint32_t dsize); diff --git a/string.nasm b/string.nasm new file mode 100644 index 0000000..4ade6c5 --- /dev/null +++ b/string.nasm @@ -0,0 +1,235 @@ +; C string.h implementations + +global memcmp +global memcpy +global memset +global strcmp +global stpcpy +global strcpy +global strlen +global strncpy + +; return eax(, edx) parameters stack scratch eax ecx edx preserved ebx esi edi ebp esp + +; int memcmp(const void s1[], const void s2[], size_t n); +memcmp: +mov ecx, [esp+12] +jecxz .z ; return 0 if n is 0 +push esi +push edi +mov esi, [esp+12] +mov edi, [esp+16] +mov edx, ecx +shr ecx, 2 +jz .ld +repe cmpsd ; find non-matching dwords +jne .f2 +.ld: +and edx, 3 +jz .sz ; none left, so all matched +mov al, byte [esi+0] +sub al, byte [edi+0] +jnz .f +dec edx +jz .sz +mov al, byte [esi+1] +sub al, byte [edi+1] +jnz .f +dec edx +jz .sz +mov al, byte [esi+2] +sub al, byte [edi+2] +jmp .f ; last byte +.sz: ; no non-matching bytes +pop edi +pop esi +.z: +xor eax, eax +ret +.f: ; diff in al +movsx eax, al +pop edi +pop esi +ret +.f2: ; found non-matching dword at ESI-4 <-> EDI-4 +mov eax, [esi-4] +mov ecx, [edi-4] +sub al, cl +jnz .d +shr eax, 8 ; ESI-3 +shr ecx, 8 +sub al, cl +jnz .d +shr eax, 8 ; ESI-2 +shr ecx, 8 +sub al, cl +jnz .d +shr eax, 8 ; ESI-1 +shr ecx, 8 +sub al, cl +.d: +movsx eax, al +pop edi +pop esi +ret + +; void *memcpy(void dest[], const void src[], size_t n); +memcpy: +mov ecx, [esp+12] +jecxz .none +push esi +push edi +mov edi, [esp+12] +mov esi, [esp+16] +mov edx, ecx +shr ecx, 2 +jz .dword_done +rep movsd +.dword_done: +test edx, 2 +jz .word_done +movsw +.word_done: +test edx, 1 +jz .done +movsb +.done: +pop edi +pop esi +.none: +mov eax, [esp+4] +ret + +; void *memset(void s[], int c, size_t n); +memset: +mov ecx, [esp+12] +jecxz .none +push edi +mov edi, [esp+8] +mov eax, [esp+12] +mov ah, al +mov edx, eax +shl eax, 16 +mov ax, dx +mov edx, ecx +shr ecx, 2 +jz .dword_done +rep stosd +.dword_done: +test edx, 2 +jz .word_done +stosw +.word_done: +test edx, 1 +jz .done +stosb +.done: +pop edi +.none: +mov eax, [esp+4] +ret + +; int strcmp(const char *s1, const char *s2); +strcmp: +push esi +push edi +mov esi, [esp+12] +mov edi, [esp+16] +xor ecx, ecx +jmp .l_inner +.l: +inc ecx +.l_inner: +xor eax, eax +or al, [esi+ecx] +jz .s1_end +or ah, [edi+ecx] +jz .s2_end +sub al, ah +jz .l ; equal +.done: +movsx eax, al +pop edi +pop esi +ret +.s1_end: +mov ah, [edi+ecx] +.s2_end: +sub al, ah +jmp .done + +; char *stpcpy(char *restrict dst, const char *restrict src); +stpcpy: +push esi +push edi +mov edi, [esp+12] +mov esi, [esp+16] +xor ecx, ecx +.l: +xor eax, eax +or al, [esi+ecx] +mov [edi+ecx], al +jz .done +inc ecx +jmp .l +.done: +lea eax, [edi+ecx] +pop edi +pop esi +ret +; char *strcpy(char *restrict dst, const char *restrict src); +strcpy: +push esi +push edi +mov edi, [esp+12] +mov esi, [esp+16] +xor ecx, ecx +.l: +xor eax, eax +or al, [esi+ecx] +mov [edi+ecx], al +jz .done +inc ecx +jmp .l +.done: +mov eax, [esp+12] +pop edi +pop esi +ret + +; size_t strlen(const char *s); +strlen: +push edi +mov edi, [esp+8] +xor eax, eax +mov ecx, -1 +repne scasb ; find AL in EDI +not ecx +lea eax, [ecx-1] +pop edi +ret + +; char *strncpy(char dst[restrict .dsize], const char *restrict src, size_t dsize); +strncpy: +push esi +push edi +mov edi, [esp+12] +mov esi, [esp+16] +mov ecx, [esp+20] +jecxz .done +.l: +xor eax, eax +or al, [esi] +jz .src_empty +stosb +inc esi +loop .l +jmp .done +.src_empty: +rep stosb +.done: +mov eax, [esp+12] +pop edi +pop esi +ret +