diff --git a/Makefile b/Makefile index 9afc600..73d1b41 100644 --- a/Makefile +++ b/Makefile @@ -3,8 +3,7 @@ SRCS = $(filter %.c,$(FILES)) BINS = $(SRCS:%.c=%.elf) MUSLFILES = $(wildcard ./musl/*) -MUSLSRCS = $(filter %.s,$(MUSLFILES)) -MUSLOBJS = $(MUSLSRCS:%.s=%.o) +MUSLOBJS = $(filter %.o,$(MUSLFILES)) CFLAGS = -Wall -Werror -pedantic -Wno-unused -std=gnu2y\ -O3 -c -g -fno-stack-protector -fno-pie\ diff --git a/day5.c b/day5.c index 6768d31..fa6e21f 100644 --- a/day5.c +++ b/day5.c @@ -1,4 +1,5 @@ #include "lib.h" +#include #define DBG 0 #define PRETTY 1 @@ -38,6 +39,15 @@ struct line { alignas(0x40) static struct line ranges[256]; +static +int linecmp(const void *av, const void *bv) { + const struct line *a = av; + const struct line *b = bv; + if (a->sv < b->sv) return -1; + else if (a->sv > b->sv) return 1; + else return 0; +} + static unsigned long do_part1(size_t file_len, unsigned char file[file_len]) { unsigned char *s = file; @@ -66,13 +76,13 @@ unsigned long do_part1(size_t file_len, unsigned char file[file_len]) { s++; d++; } while (*s != '\n'); - d->sv = 0; + #if PRETTY - d->pad0[0] = 'F'; - d->pad0[1] = 'I'; - d->pad0[2] = 'N'; - d->pad0[3] = 'I'; + size_t range_cnt = d - ranges; + qsort(ranges, range_cnt, sizeof(*ranges), linecmp); #endif + + d->sv = 0; s++; do { @@ -108,32 +118,32 @@ unsigned long do_part2(size_t file_len, unsigned char file[file_len]) { unsigned char *tmp; d->sv = grabbcd(s, &tmp); #if PRETTY - snprintd(d->sv, sizeof(d->s), d->s); + snprinth(d->sv, sizeof(d->s), d->s); #endif s = tmp; s++; d->ev = grabbcd(s, &tmp); #if PRETTY - snprintd(d->ev, sizeof(d->e), d->e); + snprinth(d->ev, sizeof(d->e), d->e); #endif s = tmp; s++; d++; } while (*s != '\n'); - d->sv = 0; + #if PRETTY - d->pad0[0] = 'F'; - d->pad0[1] = 'I'; - d->pad0[2] = 'N'; - d->pad0[3] = 'I'; + size_t range_cnt = d - ranges; + qsort(ranges, range_cnt, sizeof(*ranges), linecmp); #endif + d->sv = 0; + + outer: for (struct line *di = ranges; di->sv; di++) { bcdint dis = di->sv; bcdint die = di->ev; if (dis == -1) continue; - for (struct line *dj = ranges; dj->sv; dj++) { - if (dj == di) continue; + for (struct line *dj = di+1; dj->sv; dj++) { bcdint djs = dj->sv; bcdint dje = dj->ev; if (djs == -1) continue; diff --git a/lib.h b/lib.h index 0101e4c..7aae8b7 100644 --- a/lib.h +++ b/lib.h @@ -140,7 +140,6 @@ void snprinth(unsigned long v, size_t n, unsigned char d[n]) { v >>= 4; if (p == d) return; } - while (p >= d) *p-- = 0; } static diff --git a/musl/atomic.h b/musl/atomic.h new file mode 100644 index 0000000..8f71c8c --- /dev/null +++ b/musl/atomic.h @@ -0,0 +1,333 @@ +#ifndef _ATOMIC_H +#define _ATOMIC_H + +#include + +#include "atomic_arch.h" + +#ifdef a_ll + +#ifndef a_pre_llsc +#define a_pre_llsc() +#endif + +#ifndef a_post_llsc +#define a_post_llsc() +#endif + +#ifndef a_cas +#define a_cas a_cas +static inline int a_cas(volatile int *p, int t, int s) +{ + int old; + a_pre_llsc(); + do old = a_ll(p); + while (old==t && !a_sc(p, s)); + a_post_llsc(); + return old; +} +#endif + +#ifndef a_swap +#define a_swap a_swap +static inline int a_swap(volatile int *p, int v) +{ + int old; + a_pre_llsc(); + do old = a_ll(p); + while (!a_sc(p, v)); + a_post_llsc(); + return old; +} +#endif + +#ifndef a_fetch_add +#define a_fetch_add a_fetch_add +static inline int a_fetch_add(volatile int *p, int v) +{ + int old; + a_pre_llsc(); + do old = a_ll(p); + while (!a_sc(p, (unsigned)old + v)); + a_post_llsc(); + return old; +} +#endif + +#ifndef a_fetch_and +#define a_fetch_and a_fetch_and +static inline int a_fetch_and(volatile int *p, int v) +{ + int old; + a_pre_llsc(); + do old = a_ll(p); + while (!a_sc(p, old & v)); + a_post_llsc(); + return old; +} +#endif + +#ifndef a_fetch_or +#define a_fetch_or a_fetch_or +static inline int a_fetch_or(volatile int *p, int v) +{ + int old; + a_pre_llsc(); + do old = a_ll(p); + while (!a_sc(p, old | v)); + a_post_llsc(); + return old; +} +#endif + +#endif + +#ifdef a_ll_p + +#ifndef a_cas_p +#define a_cas_p a_cas_p +static inline void *a_cas_p(volatile void *p, void *t, void *s) +{ + void *old; + a_pre_llsc(); + do old = a_ll_p(p); + while (old==t && !a_sc_p(p, s)); + a_post_llsc(); + return old; +} +#endif + +#endif + +#ifndef a_cas +#error missing definition of a_cas +#endif + +#ifndef a_swap +#define a_swap a_swap +static inline int a_swap(volatile int *p, int v) +{ + int old; + do old = *p; + while (a_cas(p, old, v) != old); + return old; +} +#endif + +#ifndef a_fetch_add +#define a_fetch_add a_fetch_add +static inline int a_fetch_add(volatile int *p, int v) +{ + int old; + do old = *p; + while (a_cas(p, old, (unsigned)old+v) != old); + return old; +} +#endif + +#ifndef a_fetch_and +#define a_fetch_and a_fetch_and +static inline int a_fetch_and(volatile int *p, int v) +{ + int old; + do old = *p; + while (a_cas(p, old, old&v) != old); + return old; +} +#endif +#ifndef a_fetch_or +#define a_fetch_or a_fetch_or +static inline int a_fetch_or(volatile int *p, int v) +{ + int old; + do old = *p; + while (a_cas(p, old, old|v) != old); + return old; +} +#endif + +#ifndef a_and +#define a_and a_and +static inline void a_and(volatile int *p, int v) +{ + a_fetch_and(p, v); +} +#endif + +#ifndef a_or +#define a_or a_or +static inline void a_or(volatile int *p, int v) +{ + a_fetch_or(p, v); +} +#endif + +#ifndef a_inc +#define a_inc a_inc +static inline void a_inc(volatile int *p) +{ + a_fetch_add(p, 1); +} +#endif + +#ifndef a_dec +#define a_dec a_dec +static inline void a_dec(volatile int *p) +{ + a_fetch_add(p, -1); +} +#endif + +#ifndef a_store +#define a_store a_store +static inline void a_store(volatile int *p, int v) +{ +#ifdef a_barrier + a_barrier(); + *p = v; + a_barrier(); +#else + a_swap(p, v); +#endif +} +#endif + +#ifndef a_barrier +#define a_barrier a_barrier +static inline void a_barrier() +{ + volatile int tmp = 0; + a_cas(&tmp, 0, 0); +} +#endif + +#ifndef a_spin +#define a_spin a_barrier +#endif + +#ifndef a_and_64 +#define a_and_64 a_and_64 +static inline void a_and_64(volatile uint64_t *p, uint64_t v) +{ + union { uint64_t v; uint32_t r[2]; } u = { v }; + if (u.r[0]+1) a_and((int *)p, u.r[0]); + if (u.r[1]+1) a_and((int *)p+1, u.r[1]); +} +#endif + +#ifndef a_or_64 +#define a_or_64 a_or_64 +static inline void a_or_64(volatile uint64_t *p, uint64_t v) +{ + union { uint64_t v; uint32_t r[2]; } u = { v }; + if (u.r[0]) a_or((int *)p, u.r[0]); + if (u.r[1]) a_or((int *)p+1, u.r[1]); +} +#endif + +#ifndef a_cas_p +typedef char a_cas_p_undefined_but_pointer_not_32bit[-sizeof(char) == 0xffffffff ? 1 : -1]; +#define a_cas_p a_cas_p +static inline void *a_cas_p(volatile void *p, void *t, void *s) +{ + return (void *)a_cas((volatile int *)p, (int)t, (int)s); +} +#endif + +#ifndef a_or_l +#define a_or_l a_or_l +static inline void a_or_l(volatile void *p, long v) +{ + if (sizeof(long) == sizeof(int)) a_or(p, v); + else a_or_64(p, v); +} +#endif + +#ifndef a_crash +#define a_crash a_crash +static inline void a_crash() +{ + *(volatile char *)0=0; +} +#endif + +#ifndef a_ctz_32 +#define a_ctz_32 a_ctz_32 +static inline int a_ctz_32(uint32_t x) +{ +#ifdef a_clz_32 + return 31-a_clz_32(x&-x); +#else + static const char debruijn32[32] = { + 0, 1, 23, 2, 29, 24, 19, 3, 30, 27, 25, 11, 20, 8, 4, 13, + 31, 22, 28, 18, 26, 10, 7, 12, 21, 17, 9, 6, 16, 5, 15, 14 + }; + return debruijn32[(x&-x)*0x076be629 >> 27]; +#endif +} +#endif + +#ifndef a_ctz_64 +#define a_ctz_64 a_ctz_64 +static inline int a_ctz_64(uint64_t x) +{ + static const char debruijn64[64] = { + 0, 1, 2, 53, 3, 7, 54, 27, 4, 38, 41, 8, 34, 55, 48, 28, + 62, 5, 39, 46, 44, 42, 22, 9, 24, 35, 59, 56, 49, 18, 29, 11, + 63, 52, 6, 26, 37, 40, 33, 47, 61, 45, 43, 21, 23, 58, 17, 10, + 51, 25, 36, 32, 60, 20, 57, 16, 50, 31, 19, 15, 30, 14, 13, 12 + }; + if (sizeof(long) < 8) { + uint32_t y = x; + if (!y) { + y = x>>32; + return 32 + a_ctz_32(y); + } + return a_ctz_32(y); + } + return debruijn64[(x&-x)*0x022fdd63cc95386dull >> 58]; +} +#endif + +static inline int a_ctz_l(unsigned long x) +{ + return (sizeof(long) < 8) ? a_ctz_32(x) : a_ctz_64(x); +} + +#ifndef a_clz_64 +#define a_clz_64 a_clz_64 +static inline int a_clz_64(uint64_t x) +{ +#ifdef a_clz_32 + if (x>>32) + return a_clz_32(x>>32); + return a_clz_32(x) + 32; +#else + uint32_t y; + int r; + if (x>>32) y=x>>32, r=0; else y=x, r=32; + if (y>>16) y>>=16; else r |= 16; + if (y>>8) y>>=8; else r |= 8; + if (y>>4) y>>=4; else r |= 4; + if (y>>2) y>>=2; else r |= 2; + return r | !(y>>1); +#endif +} +#endif + +#ifndef a_clz_32 +#define a_clz_32 a_clz_32 +static inline int a_clz_32(uint32_t x) +{ + x >>= 1; + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + x |= x >> 8; + x |= x >> 16; + x++; + return 31-a_ctz_32(x); +} +#endif + +#endif diff --git a/musl/atomic_arch.h b/musl/atomic_arch.h new file mode 100644 index 0000000..da4e203 --- /dev/null +++ b/musl/atomic_arch.h @@ -0,0 +1,123 @@ +#define a_cas a_cas +static inline int a_cas(volatile int *p, int t, int s) +{ + __asm__ __volatile__ ( + "lock ; cmpxchg %3, %1" + : "=a"(t), "=m"(*p) : "a"(t), "r"(s) : "memory" ); + return t; +} + +#define a_cas_p a_cas_p +static inline void *a_cas_p(volatile void *p, void *t, void *s) +{ + __asm__( "lock ; cmpxchg %3, %1" + : "=a"(t), "=m"(*(void *volatile *)p) + : "a"(t), "r"(s) : "memory" ); + return t; +} + +#define a_swap a_swap +static inline int a_swap(volatile int *p, int v) +{ + __asm__ __volatile__( + "xchg %0, %1" + : "=r"(v), "=m"(*p) : "0"(v) : "memory" ); + return v; +} + +#define a_fetch_add a_fetch_add +static inline int a_fetch_add(volatile int *p, int v) +{ + __asm__ __volatile__( + "lock ; xadd %0, %1" + : "=r"(v), "=m"(*p) : "0"(v) : "memory" ); + return v; +} + +#define a_and a_and +static inline void a_and(volatile int *p, int v) +{ + __asm__ __volatile__( + "lock ; and %1, %0" + : "=m"(*p) : "r"(v) : "memory" ); +} + +#define a_or a_or +static inline void a_or(volatile int *p, int v) +{ + __asm__ __volatile__( + "lock ; or %1, %0" + : "=m"(*p) : "r"(v) : "memory" ); +} + +#define a_and_64 a_and_64 +static inline void a_and_64(volatile uint64_t *p, uint64_t v) +{ + __asm__ __volatile( + "lock ; and %1, %0" + : "=m"(*p) : "r"(v) : "memory" ); +} + +#define a_or_64 a_or_64 +static inline void a_or_64(volatile uint64_t *p, uint64_t v) +{ + __asm__ __volatile__( + "lock ; or %1, %0" + : "=m"(*p) : "r"(v) : "memory" ); +} + +#define a_inc a_inc +static inline void a_inc(volatile int *p) +{ + __asm__ __volatile__( + "lock ; incl %0" + : "=m"(*p) : "m"(*p) : "memory" ); +} + +#define a_dec a_dec +static inline void a_dec(volatile int *p) +{ + __asm__ __volatile__( + "lock ; decl %0" + : "=m"(*p) : "m"(*p) : "memory" ); +} + +#define a_store a_store +static inline void a_store(volatile int *p, int x) +{ + __asm__ __volatile__( + "mov %1, %0 ; lock ; orl $0,(%%rsp)" + : "=m"(*p) : "r"(x) : "memory" ); +} + +#define a_barrier a_barrier +static inline void a_barrier() +{ + __asm__ __volatile__( "" : : : "memory" ); +} + +#define a_spin a_spin +static inline void a_spin() +{ + __asm__ __volatile__( "pause" : : : "memory" ); +} + +#define a_crash a_crash +static inline void a_crash() +{ + __asm__ __volatile__( "hlt" : : : "memory" ); +} + +#define a_ctz_64 a_ctz_64 +static inline int a_ctz_64(uint64_t x) +{ + __asm__( "bsf %1,%0" : "=r"(x) : "r"(x) ); + return x; +} + +#define a_clz_64 a_clz_64 +static inline int a_clz_64(uint64_t x) +{ + __asm__( "bsr %1,%0 ; xor $63,%0" : "=r"(x) : "r"(x) ); + return x; +} diff --git a/musl/qsort.c b/musl/qsort.c new file mode 100644 index 0000000..02555c2 --- /dev/null +++ b/musl/qsort.c @@ -0,0 +1,219 @@ +/* Copyright (C) 2011 by Lynn Ochs + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/* Minor changes by Rich Felker for integration in musl, 2011-04-27. */ + +/* Smoothsort, an adaptive variant of Heapsort. Memory usage: O(1). + Run time: Worst case O(n log n), close to O(n) in the mostly-sorted case. */ + +#include +#include +#include + +#include "atomic.h" +#define ntz(x) a_ctz_l((x)) + +typedef int (*cmpfun)(const void *, const void *, void *); + +static inline int pntz(size_t p[2]) { + int r = ntz(p[0] - 1); + if(r != 0 || (r = 8*sizeof(size_t) + ntz(p[1])) != 8*sizeof(size_t)) { + return r; + } + return 0; +} + +static void cycle(size_t width, unsigned char* ar[], int n) +{ + unsigned char tmp[256]; + size_t l; + int i; + + if(n < 2) { + return; + } + + ar[n] = tmp; + while(width) { + l = sizeof(tmp) < width ? sizeof(tmp) : width; + memcpy(ar[n], ar[0], l); + for(i = 0; i < n; i++) { + memcpy(ar[i], ar[i + 1], l); + ar[i] += l; + } + width -= l; + } + ar[n] = 0; +} + +/* shl() and shr() need n > 0 */ +static inline void shl(size_t p[2], int n) +{ + if(n >= 8 * sizeof(size_t)) { + n -= 8 * sizeof(size_t); + p[1] = p[0]; + p[0] = 0; + } + p[1] <<= n; + p[1] |= p[0] >> (sizeof(size_t) * 8 - n); + p[0] <<= n; +} + +static inline void shr(size_t p[2], int n) +{ + if(n >= 8 * sizeof(size_t)) { + n -= 8 * sizeof(size_t); + p[0] = p[1]; + p[1] = 0; + } + p[0] >>= n; + p[0] |= p[1] << (sizeof(size_t) * 8 - n); + p[1] >>= n; +} + +static void sift(unsigned char *head, size_t width, cmpfun cmp, void *arg, int pshift, size_t lp[]) +{ + unsigned char *rt, *lf; + unsigned char *ar[14 * sizeof(size_t) + 1]; + int i = 1; + + ar[0] = head; + while(pshift > 1) { + rt = head - width; + lf = head - width - lp[pshift - 2]; + + if(cmp(ar[0], lf, arg) >= 0 && cmp(ar[0], rt, arg) >= 0) { + break; + } + if(cmp(lf, rt, arg) >= 0) { + ar[i++] = lf; + head = lf; + pshift -= 1; + } else { + ar[i++] = rt; + head = rt; + pshift -= 2; + } + } + cycle(width, ar, i); +} + +static void trinkle(unsigned char *head, size_t width, cmpfun cmp, void *arg, size_t pp[2], int pshift, int trusty, size_t lp[]) +{ + unsigned char *stepson, + *rt, *lf; + size_t p[2]; + unsigned char *ar[14 * sizeof(size_t) + 1]; + int i = 1; + int trail; + + p[0] = pp[0]; + p[1] = pp[1]; + + ar[0] = head; + while(p[0] != 1 || p[1] != 0) { + stepson = head - lp[pshift]; + if(cmp(stepson, ar[0], arg) <= 0) { + break; + } + if(!trusty && pshift > 1) { + rt = head - width; + lf = head - width - lp[pshift - 2]; + if(cmp(rt, stepson, arg) >= 0 || cmp(lf, stepson, arg) >= 0) { + break; + } + } + + ar[i++] = stepson; + head = stepson; + trail = pntz(p); + shr(p, trail); + pshift += trail; + trusty = 0; + } + if(!trusty) { + cycle(width, ar, i); + sift(head, width, cmp, arg, pshift, lp); + } +} + +void qsort_r(void *base, size_t nel, size_t width, cmpfun cmp, void *arg) +{ + size_t lp[12*sizeof(size_t)]; + size_t i, size = width * nel; + unsigned char *head, *high; + size_t p[2] = {1, 0}; + int pshift = 1; + int trail; + + if (!size) return; + + head = base; + high = head + size - width; + + /* Precompute Leonardo numbers, scaled by element width */ + for(lp[0]=lp[1]=width, i=2; (lp[i]=lp[i-2]+lp[i-1]+width) < size; i++); + + while(head < high) { + if((p[0] & 3) == 3) { + sift(head, width, cmp, arg, pshift, lp); + shr(p, 2); + pshift += 2; + } else { + if(lp[pshift - 1] >= high - head) { + trinkle(head, width, cmp, arg, p, pshift, 0, lp); + } else { + sift(head, width, cmp, arg, pshift, lp); + } + + if(pshift == 1) { + shl(p, 1); + pshift = 0; + } else { + shl(p, pshift - 1); + pshift = 1; + } + } + + p[0] |= 1; + head += width; + } + + trinkle(head, width, cmp, arg, p, pshift, 0, lp); + + while(pshift != 1 || p[0] != 1 || p[1] != 0) { + if(pshift <= 1) { + trail = pntz(p); + shr(p, trail); + pshift += trail; + } else { + shl(p, 2); + pshift -= 2; + p[0] ^= 7; + shr(p, 1); + trinkle(head - lp[pshift] - width, width, cmp, arg, p, pshift + 1, 1, lp); + shl(p, 1); + p[0] |= 1; + trinkle(head - width, width, cmp, arg, p, pshift, 1, lp); + } + head -= width; + } +} diff --git a/musl/qsort_nr.c b/musl/qsort_nr.c new file mode 100644 index 0000000..d6172a0 --- /dev/null +++ b/musl/qsort_nr.c @@ -0,0 +1,16 @@ +#include + +typedef int (*cmpfun)(const void *, const void *); +typedef int (*cmpfun_r)(const void *, const void *, cmpfun); +void qsort_r(void *base, size_t nel, size_t width, cmpfun_r cmp, cmpfun arg); + + +static int wrapper_cmp(const void *v1, const void *v2, cmpfun cmp) +{ + return cmp(v1, v2); +} + +void qsort(void *base, size_t nel, size_t width, cmpfun cmp) +{ + qsort_r(base, nel, width, wrapper_cmp, cmp); +}