Skip to content

Commit 5ae9008

Browse files
committed
add tlist, aka treap list
a list optimized for fast insertion and deletion at arbitrary positions, while also being reasonably fast at indexing - all operations complete roughly in O(log N). while there may be other potentially faster algorithms to achieve these properties, probably none can compete in compactness and simplicity of the implementation, of which the core code is about 90 lines of C in this case. the rest of the code provides a nice API around it, which should be familiar to users of my dynamic array implementation known as sblist. the version committed here is the bare minimum to provide the most important features of a list, extra features like those currently available in sblist like sorted insertion or binary search may be added in the future. the header file tlist.h provides comments explaining the functionality, and the included test file can probably serve as a good usage example. credits go to the author of the e-maxx.ru article describing the possibility of using a treap as a list with implicit indices instead of keys - as the typical use of a treap is as a key-value store, similar to a hashtable.
1 parent 321e1ee commit 5ae9008

File tree

3 files changed

+358
-0
lines changed

3 files changed

+358
-0
lines changed

include/tlist.h

+102
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
#ifndef TLIST_H
2+
#define TLIST_H
3+
4+
#include <stddef.h>
5+
6+
/*
7+
tlist, aka treap list. (C) 2024 rofl0r.
8+
9+
the core's algorithm is based on the "implicit treap" described on
10+
e-maxx.ru.
11+
12+
tlist behaves like a dynamic array, but unlike a real array supports
13+
insertion and deletion with O(log N) performance characteristics.
14+
15+
indexing is also O(log N), so if you don't need fast insertion and
16+
deletion, only appending, picking a traditional dynamic array will be
17+
faster for your usecase.
18+
19+
on a fast machine with plenty of cache, a traditional dynamic array
20+
will also be faster if you need to insert or remove some items, and
21+
the array is reasonably small (up to about 2k entries).
22+
though even there the tlist is quite competitive.
23+
for anything else, the tlist outperforms the traditional array by far.
24+
25+
its most appealing characteristic, aside from the above, is that it
26+
can be implemented in less than 100 lines.
27+
it comes at a cost though.
28+
the memory consumption per node is 24 bytes on 64bit machines, and 16
29+
on 32 bit. plus the size of the item being stored in it.
30+
31+
the list is initialized with the fixed size of a single item that
32+
needs to be stored - it could be a single integer, a struct,
33+
or a pointer.
34+
35+
on insertion, you pass a pointer to a single item, the object being
36+
pointed to is then copied into the node.
37+
38+
for non-fixed size data items such as strings, you'd use the size
39+
of a pointer for tlist_new(). then you need to allocate the strings
40+
yourself and insert a pointer to the string - i.e. a char**.
41+
the list can free the pointed to content automatically if you use
42+
the _deep suffixed functions.
43+
44+
the list can hold a maximum of UINT_MAX items.
45+
46+
note that unlike in my dynamic array implementation "sblist", functions
47+
taking an index receive it as second, not third argument.
48+
it seems more natural to first pass the list, then the index, then the
49+
value, as the index refers to the list.
50+
apart from that, the api is almost identical, which allows for a quick
51+
swap-out.
52+
*/
53+
54+
struct tlist;
55+
typedef struct tlist tlist;
56+
57+
/* allocates a new list prepared to store nodes of itemsize size.
58+
may return NULL on resource exhaustion. */
59+
struct tlist *tlist_new(unsigned itemsize);
60+
61+
/* return the number of items/nodes in the list. */
62+
size_t tlist_getsize(struct tlist* t);
63+
64+
/* get the pointer to the data of idx'th item in the list.
65+
may return NULL if the idx is equal or greater than list size,
66+
you have to cast the return value to a pointer to the type
67+
that you inserted. */
68+
void *tlist_get(struct tlist* t, size_t idx);
69+
70+
/* insert value at position idx */
71+
/* returns 1 on success, 0 otherwise (i.e. not enough ram) */
72+
int tlist_insert(struct tlist* t, size_t idx, void* val);
73+
74+
/* append value to the end of the list */
75+
int tlist_append(struct tlist* t, void *val);
76+
77+
/* delete item as position idx */
78+
/* returns 1 on success, 0 otherwise (invalid index) */
79+
int tlist_delete(struct tlist *t, size_t idx);
80+
/* same as tlist_delete, but frees the stored pointer too - only use
81+
if you initialized the list with sizeof(pointer) */
82+
int tlist_delete_deep(struct tlist *t, size_t idx);
83+
84+
/* remove and free all items in list, but not the list itself. */
85+
void tlist_free_items(struct tlist *t);
86+
void tlist_free_items_deep(struct tlist *t);
87+
88+
/* free the list and all items in it - returns NULL so you can do
89+
mylist = tlist_free(mylist) instead of requiring 2 statements to
90+
have your list freed and nulled. */
91+
void* tlist_free(struct tlist *t);
92+
void* tlist_free_deep(struct tlist *t);
93+
94+
/* this is just a debug function that prints the node balance of
95+
the tree. it's not built-in by default because it prints stuff
96+
to stdout. */
97+
float tlist_getbalance(struct tlist *t);
98+
99+
#pragma RcB2 DEP "../src/tlist/*.c"
100+
101+
#endif
102+

src/tlist/tlist.c

+198
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,198 @@
1+
#include <stdlib.h>
2+
#include <string.h>
3+
#include "../../include/tlist.h"
4+
5+
#ifndef UINT_MAX
6+
#define UINT_MAX 0xffffffffU
7+
#endif
8+
9+
static int mrand(unsigned *seed)
10+
{
11+
return (*seed = (*seed+1) * 1103515245 + 12345 - 1)+1 & 0x7fffffff;
12+
}
13+
14+
typedef struct item* pitem;
15+
struct item {
16+
unsigned prior, cnt;
17+
pitem l, r;
18+
};
19+
20+
static unsigned cnt (pitem it) {
21+
return it ? it->cnt : 0;
22+
}
23+
24+
static void upd_cnt (pitem it) {
25+
if (it)
26+
it->cnt = cnt(it->l) + cnt(it->r) + 1;
27+
}
28+
29+
static void merge (pitem *t, pitem l, pitem r) {
30+
if (!l || !r)
31+
*t = l ? l : r;
32+
else if (l->prior > r->prior)
33+
merge (&l->r, l->r, r), *t = l;
34+
else
35+
merge (&r->l, l, r->l), *t = r;
36+
upd_cnt (*t);
37+
}
38+
39+
static void split (pitem t, pitem *l, pitem *r, unsigned key, unsigned add) {
40+
if (!t) {
41+
*l = *r = 0;
42+
return;
43+
}
44+
unsigned cur_key = add + cnt(t->l);
45+
if (key <= cur_key)
46+
split (t->l, l, &t->l, key, add), *r = t;
47+
else
48+
split (t->r, &t->r, r, key, add + 1 + cnt(t->l)), *l = t;
49+
upd_cnt (t);
50+
}
51+
52+
static pitem getitem(pitem t, unsigned idx, unsigned add) {
53+
if (!t) return t;
54+
unsigned ls = cnt (t->l), cur_key = add + ls;
55+
if (cur_key == idx) return t;
56+
if (cur_key < idx)
57+
return getitem (t->r, idx, add + 1 + ls);
58+
else
59+
return getitem (t->l, idx, add);
60+
}
61+
62+
static void insert(pitem *t, pitem n, unsigned idx) {
63+
pitem t1, t2;
64+
split (*t, &t1, &t2, idx, 0);
65+
merge (t, t1, n);
66+
merge (t, *t, t2);
67+
}
68+
69+
static void remove(pitem *t, unsigned idx, unsigned add) {
70+
pitem n;
71+
if (!(*t)) return;
72+
unsigned cur_key = add + cnt ((*t)->l), new_add = cur_key + 1;
73+
unsigned rk, lk = rk = UINT_MAX;
74+
if ((*t)->l) lk = cnt ((*t)->l->l) + add;
75+
if ((*t)->r) rk = cnt ((*t)->r->l) + new_add;
76+
if (cur_key == idx) {
77+
merge (t, (*t)->l, (*t)->r);
78+
} else if (lk == idx) {
79+
merge (&n, (*t)->l->l, (*t)->l->r);
80+
(*t)->l = n;
81+
upd_cnt (*t);
82+
} else if (rk == idx) {
83+
merge (&n, (*t)->r->l, (*t)->r->r);
84+
(*t)->r = n;
85+
upd_cnt (*t);
86+
} else if (cur_key < idx) {
87+
remove (&(*t)->r, idx, new_add);
88+
upd_cnt (*t);
89+
} else {
90+
remove (&(*t)->l, idx, add);
91+
upd_cnt (*t);
92+
}
93+
}
94+
95+
static pitem new_item(void* value, unsigned valsz, unsigned *seed) {
96+
pitem n = malloc(sizeof(struct item) + valsz);
97+
if(!n) return n;
98+
memcpy(n+1, value, valsz);
99+
n->prior = mrand(seed);
100+
n->cnt = 1;
101+
n->l = n->r = 0;
102+
return n;
103+
}
104+
105+
struct tlist {
106+
unsigned seed;
107+
unsigned itemsize;
108+
pitem root;
109+
};
110+
111+
struct tlist *tlist_new(unsigned itemsize) {
112+
struct tlist* new = malloc(sizeof (struct tlist));
113+
if(!new) return 0;
114+
new->seed = 385-1;
115+
new->itemsize = itemsize;
116+
new->root = 0;
117+
return new;
118+
}
119+
120+
static void* data(pitem it) {
121+
return it+1;
122+
}
123+
124+
size_t tlist_getsize(struct tlist* t) {
125+
return cnt(t->root);
126+
}
127+
128+
void* tlist_get(struct tlist* t, size_t idx) {
129+
return data(getitem(t->root, idx, 0));
130+
}
131+
132+
int tlist_insert(struct tlist* t, size_t idx, void *value) {
133+
if(idx > cnt (t->root)) return 0;
134+
pitem new = new_item(value, t->itemsize, &t->seed);
135+
if(!new) return 0;
136+
insert(&t->root, new, idx);
137+
return 1;
138+
}
139+
140+
int tlist_append(struct tlist* t, void *value) {
141+
return tlist_insert(t, cnt(t->root), value);
142+
}
143+
144+
static int tlist_delete_impl(struct tlist *t, size_t idx, int deep) {
145+
if(idx >= cnt (t->root)) return 0;
146+
pitem it = getitem(t->root, idx, 0);
147+
if(deep) free(data(it));
148+
remove(&t->root, idx, 0);
149+
free(it);
150+
return 1;
151+
}
152+
153+
int tlist_delete(struct tlist *t, size_t idx) {
154+
return tlist_delete_impl(t, idx, 0);
155+
}
156+
157+
int tlist_delete_deep(struct tlist *t, size_t idx) {
158+
return tlist_delete_impl(t, idx, 1);
159+
}
160+
161+
static void tlist_free_items_impl(struct tlist *t, int deep) {
162+
while(cnt(t->root)) tlist_delete_impl(t, 0, deep);
163+
}
164+
165+
void tlist_free_items(struct tlist *t) {
166+
tlist_free_items_impl(t, 0);
167+
}
168+
169+
void tlist_free_items_deep(struct tlist *t) {
170+
tlist_free_items_impl(t, 1);
171+
}
172+
173+
static void* tlist_free_impl(struct tlist *t, int deep) {
174+
tlist_free_items_impl(t, deep);
175+
free(t);
176+
return 0;
177+
}
178+
179+
void *tlist_free(struct tlist *t) {
180+
return tlist_free_impl(t, 0);
181+
}
182+
183+
void *tlist_free_deep(struct tlist *t) {
184+
return tlist_free_impl(t, 1);
185+
}
186+
187+
#ifdef TLIST_TEST
188+
extern int printf(const char *__restrict, ...);
189+
float tlist_getbalance(struct tlist *t) {
190+
size_t n = tlist_getsize(t);
191+
int r, l;
192+
if (n == 0) return 1.0;
193+
l = cnt (t->root->l);
194+
r = cnt (t->root->r);
195+
printf("l %d, r %d, diff %d\n", l, r, abs(l-r));
196+
return 100.f - ((float)abs(l - r)/(n/100.f));
197+
}
198+
#endif

tests/tlist_test.c

+58
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
#pragma RcB2 CPPFLAGS "-DTLIST_TEST"
2+
#include <stdlib.h>
3+
#include <time.h>
4+
#include <assert.h>
5+
#include <stdio.h>
6+
#include "../include/tlist.h"
7+
#include "../include/sblist.h"
8+
9+
#define N 20480
10+
#define USE_S 1
11+
#define USE_T 2
12+
int main(int argc, char** argv) {
13+
int mode;
14+
if(argc <= 1) mode = USE_S | USE_T;
15+
else mode = atoi(argv[1]);
16+
srand(time(0));
17+
int rnd;
18+
sblist* l = sblist_new(sizeof(int), 32);
19+
tlist* t = tlist_new(sizeof(int));
20+
for(int i = 0; i < N/2; ++i) {
21+
if(mode & USE_S) sblist_add(l, &i);
22+
if(mode & USE_T) tlist_append(t, &i);
23+
}
24+
for(int i = 0; i < N/2; ++i) {
25+
rnd = rand();
26+
if(mode & USE_S) sblist_insert(l, &i, sblist_getsize(l) == 0 ? 0 : rnd%sblist_getsize(l));
27+
if(mode & USE_T) tlist_insert(t, tlist_getsize(t) == 0 ? 0 : rnd%tlist_getsize(t), &i);
28+
}
29+
if(mode == (USE_S|USE_T))
30+
assert(sblist_getsize(l) == tlist_getsize(t));
31+
if(mode == (USE_S|USE_T))
32+
for(int i = 0; i < sblist_getsize(l); ++i) {
33+
assert(*(int*)tlist_get(t, i) == *(int*)sblist_get(l, i));
34+
}
35+
for(int i = 0; i < N/4; ++i) {
36+
rnd = rand();
37+
if(mode == (USE_S|USE_T))
38+
assert(sblist_getsize(l) == tlist_getsize(t));
39+
if(mode & USE_S)
40+
sblist_delete(l, sblist_getsize(l) == 0 ? 0 : rnd%sblist_getsize(l));
41+
if(mode & USE_T)
42+
tlist_delete(t, tlist_getsize(t) == 0 ? 0 : rnd%tlist_getsize(t));
43+
}
44+
for(int i = 0; i < N/8; ++i) {
45+
rnd = rand();
46+
if(mode & USE_S) sblist_insert(l, &i, sblist_getsize(l) == 0 ? 0 : rnd%sblist_getsize(l));
47+
if(mode & USE_T) tlist_insert(t, tlist_getsize(t) == 0 ? 0 : rnd%tlist_getsize(t), &i);
48+
}
49+
if(mode == (USE_S|USE_T))
50+
for(int i = 0; i < sblist_getsize(l); ++i) {
51+
assert(*(int*)tlist_get(t, i) == *(int*)sblist_get(l, i));
52+
}
53+
if(mode & USE_T) {
54+
printf("balance: %.4f%%\n", tlist_getbalance(t));
55+
tlist_free(t);
56+
}
57+
return 0;
58+
}

0 commit comments

Comments
 (0)