// bit map arena virtual memory allocator
// using one bit of overhead per block
// When releasing memory, you must supply the block size. Under
// modern programming discipline, this should be available since
// you generally have to check OBJECT SIZE FOR OVERFLOW anyway.
// A standard malloc and free routine is conditionally supplied
// that remembers the block sizes if this is a problem.
// please report bugs located to the program author,
// karl_m@acm.org www.geocities.com/malbrain
// n.b. the semantics of valloc differ slightly from the standard
// unix V R4 definition: the memory allocated is aligned to
// the blocksize of the appropriate templated arena, not
// necessarily to the virtual memory system page size.
// Also, valloc requests must be released with vfree.
// That said, with the default templates given below,
// valloc requests for 4K or more will align
// on a page boundary.
#include <limits.h>
#include <stdlib.h>
#define NDEBUG
#include "assert.h"
#ifdef unix
#include <unistd.h>
#else
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
#endif
// each memory arena structure is followed by a bit map
// allocation table made of unsigned ints where the low
// order bit represents the lowest block number.
typedef struct Arena_ {
char *mem, *end; // beginning and end of arena allocation
void *next; // next arena in chain with same template
unsigned blksize; // block size for memory allocation
unsigned mapmax; // map size in ints immediately following
unsigned blkmax; // maximum block allocation in bytes
unsigned avail; // amount of available arena space
unsigned scan; // current scan offset
} Arena;
// the Arenas are arrayed with their bit maps into page frames;
// the first structure in the page frame describes the page
// new arenas are allocated from the end
typedef struct Page_ {
unsigned first; // offset of lowest arena allocated
unsigned size; // size of this page in bytes
void *prev; // previous page of arenas in chain
} Page;
#define MAP_BITS (CHAR_BIT * sizeof(unsigned))
#define FRAME_SIZE 8192
Page *PageLIFO;
// allocate and initialize a new page frame for arenas
Page *vframe (void)
{
unsigned xtra, size = FRAME_SIZE;
Page *page;
#ifdef unix
page = (Page *)sbrk (size);
// round brk value to page boundary
if( xtra = (unsigned)page & 0xfff )
sbrk (0x1000 - xtra), size += 0x1000 - xtra;
#else
page = (Page *)VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE);
#endif
page->first = page->size = size;
page->prev = PageLIFO;
PageLIFO = page;
return page;
}
// arena templates for three arena shapes, but feel
// free to add others to suit your own needs.
// overlap between blkmax and blksize can reduce
// the percentage of granularity induced waste,
// as illustrated in the 16 byte and 128 byte
// templates below.
typedef struct {
unsigned blksize; // allocation granularity
unsigned blkcnt; // number of blocks in the arena
unsigned blkmax; // maximum allocation to support
Arena *arena; // chain of arenas with template
Arena *scan; // current arena being allocated
} Template;
Template ArenaDefs[] = {
{16, 8192, 511 }, // 16 x 8K = 512K total
{128, 4096, 4095 }, // 128 x 4K = 512K total
{4096, 512, 65535 }, // 4K x 512 = 2MB total
{0} // 64K and above
};
// allocate and initialize a new arena
// marking an initial allocation
void *varena (size_t bytes, Template *tmpl)
{
unsigned xtra, blks, bits, blkmax, blksize, mapmax, mapbytes;
unsigned *map;
Arena *arena;
Page *page;
assert (bytes > 0);
// build the arena from a table template,
// or make a huge arena with two blocks
// of one half the total request each
if( blksize = tmpl->blksize ) {
blkmax = tmpl->blkmax;
blks = tmpl->blkcnt;
} else {
if( xtra = bytes & 0xfff ) // round for 4K virtual memory
bytes += 4096 - xtra;
assert (~bytes & 1);
blksize = bytes / 2;
blkmax = bytes - 1;
blks = 2;
}
// block count for the initial allocation
assert (blksize > 0);
bits = (bytes + blksize - 1);
bits /= blksize;
// round mapmax up to unsigned size multiple
// unless doing a huge block without map bits
assert (MAP_BITS > 2);
if( mapmax = blks / MAP_BITS )
if( blks & (MAP_BITS - 1) )
mapmax++;
mapbytes = mapmax * sizeof(unsigned);
// allocate new page on overflow of current frame
// or on startup
if( page = PageLIFO ) {
if( page->first < mapbytes + sizeof(Page) + sizeof(Arena) )
page = vframe();
} else
page = vframe();
// allocate the arena in the frame
page->first -= mapbytes;
map = (unsigned *)((char *)page + page->first);
page->first -= sizeof(Arena);
arena = (Arena *)((char *)page + page->first);
assert (page->first >= sizeof(Page));
// initialize and insert arena into template chain
memset (arena, 0, sizeof(Arena));
arena->avail = blks * blksize;
arena->blksize = blksize;
arena->mapmax = mapmax;
arena->blkmax = blkmax;
arena->next = tmpl->arena;
tmpl->arena = arena;
if( !arena->next )
tmpl->scan = arena;
// allocate the arena's memory block
#ifdef unix
arena->mem = sbrk (arena->avail);
// align allocation on page boundary
if( xtra = (unsigned)arena->mem & 0xfff )
sbrk (0x1000 - xtra), arena->mem += 0x1000 - xtra;
#else
arena->mem = VirtualAlloc(NULL, arena->avail, MEM_COMMIT, PAGE_READWRITE);
#endif
arena->end = arena->mem + arena->avail;
arena->avail -= bits * blksize;
if( mapbytes * CHAR_BIT < bits )
return arena->mem;
// clear the initial allocation of bits in the map
while( bits >= MAP_BITS )
*map++ = 0, bits -= MAP_BITS, mapmax--;
// and mark the rest of the map as available
if( mapmax-- )
*map++ = ~0U << bits;
else
return arena->mem;
while( mapmax-- )
*map++ = ~0U;
return arena->mem;
}
// mark a partial sub-byte map allocation of 7 or fewer blocks
// of non rightmost blocks
void *vmarkbyte (Arena *arena, unsigned char val, unsigned blks, unsigned bit)
{
unsigned *map = (unsigned *)(arena + 1) + arena->scan;
unsigned char mask = UCHAR_MAX >> (CHAR_BIT - blks);
unsigned block;
assert (blks && blks < CHAR_BIT);
assert (val);
// find the available run of ones in the byte
// ignoring the rightmost bit, and assuming
// the map byte is non zero
do val >>= 1, bit++, assert (bit < MAP_BITS);
while( val & mask ^ mask );
// clear the allocation bits
arena->avail -= blks * arena->blksize;
*map ^= (unsigned)mask << bit;
assert(!(*map & (unsigned)mask << bit));
block = arena->scan * MAP_BITS + bit;
return arena->mem + block * arena->blksize;
}
// allocate consecutive run of blocks from the map
// blks must be at least one
void *vmarkmap (Arena *arena, unsigned run, unsigned blks, unsigned bit)
{
unsigned *map = (unsigned *)(arena + 1) + arena->scan, mask;
char *mem = arena->mem;
unsigned block;
block = arena->scan * MAP_BITS + bit - run;
arena->avail -= blks * arena->blksize;
mem += block * arena->blksize;
// clear initial bits of hightest map allocation
blks += bit - run;
assert (blks > 0);
mask = ~0U >> MAP_BITS - blks;
if( run < bit ) {
bit -= run;
*map ^= mask ^= ((1 << bit) - 1);
assert (!(*map & mask));
return mem;
}
*map-- ^= mask;
assert (!(map[1] & mask));
run -= bit;
// clear preceeding run bits from map
while( run >= MAP_BITS )
assert (*map == ~0U), *map-- = 0, run -= MAP_BITS;
assert ((*map | ~0U >> run) == ~0U);
*map &= ~0U >> run;
return mem;
}
// scan tables built for contents of map bytes
#if CHAR_BIT != 8
// you'll have to construct your own tables
#else
// new run value: consecutive left side bits
// (zero high order bit means no run to left)
// (table also removes low order bit)
unsigned char ArenaRun[64] = {
1, 1, 1, 1, 1, 1, 1, 1, // 0x80 - 0x8f
1, 1, 1, 1, 1, 1, 1, 1, // 0x90 - 0x9f
1, 1, 1, 1, 1, 1, 1, 1, // 0xA0 - 0xAf
1, 1, 1, 1, 1, 1, 1, 1, // 0xB0 - 0xBf
2, 2, 2, 2, 2, 2, 2, 2, // 0xC0 - 0xCf
2, 2, 2, 2, 2, 2, 2, 2, // 0xD0 - 0xDf
3, 3, 3, 3, 3, 3, 3, 3, // 0xE0 - 0xEf
4, 4, 4, 4, 5, 5, 6, 7, // 0xF0 - 0xFf
};
// available bits to continue existing run of consecutive blocks
// (entries w/high order bit set are folded onto these values)
// (low order bit is removed, and 0xFF is also removed)
unsigned char ArenaGlom[64] = {
1, 2, 1, 3, 1, 2, 1, 4, // 0x00 - 0x0f
1, 2, 1, 3, 1, 2, 1, 5, // 0x10 - 0x1f
1, 2, 1, 3, 1, 2, 1, 4, // 0x20 - 0x2f
1, 2, 1, 3, 1, 2, 1, 6, // 0x30 - 0x3f
1, 2, 1, 3, 1, 2, 1, 4, // 0x40 - 0x4f
1, 2, 1, 3, 1, 2, 1, 5, // 0x50 - 0x5f
1, 2, 1, 3, 1, 2, 1, 4, // 0x60 - 0x6f
1, 2, 1, 3, 1, 2, 1, 7, // 0x70 - 0x7f
};
// available bits remaining after first zero
// (without redundant low order bit)
unsigned char ArenaAfter[128] = {
0, 1, 1, 2, 1, 1, 2, 3, // 0x00 - 0x0f
1, 1, 1, 2, 2, 2, 3, 4, // 0x10 - 0x1f
1, 1, 1, 2, 1, 1, 2, 3, // 0x20 - 0x2f
2, 2, 2, 2, 3, 3, 4, 5, // 0x30 - 0x3f
1, 1, 1, 2, 1, 1, 2, 3, // 0x40 - 0x4f
1, 1, 1, 2, 2, 2, 3, 4, // 0x50 - 0x5f
2, 2, 2, 2, 2, 2, 2, 3, // 0x60 - 0x6f
3, 3, 3, 3, 4, 4, 5, 6, // 0x70 - 0x7f
1, 1, 1, 2, 1, 1, 2, 3, // 0x80 - 0x8f
1, 1, 1, 2, 2, 2, 3, 4, // 0x90 - 0x9f
1, 1, 1, 2, 1, 1, 2, 3, // 0xA0 - 0xAf
2, 2, 2, 2, 3, 3, 4, 5, // 0xB0 - 0xBf
2, 2, 2, 2, 2, 2, 2, 3, // 0xC0 - 0xCf
2, 2, 2, 2, 2, 2, 3, 4, // 0xD0 - 0xDf
3, 3, 3, 3, 3, 3, 3, 3, // 0xE0 - 0xEf
4, 4, 4, 4, 5, 5, 6, 7, // 0xF0 - 0xFf
};
#endif
// scan an existing arena for available space
// processing the map in byte size pieces
// of int sized chunks using the tables
void *vscan (Arena *arena, size_t bytes)
{
unsigned chunk, *map = (unsigned *)(arena + 1);
unsigned run = 0, blks, bit;
unsigned char nxt;
blks = bytes + arena->blksize - 1;
assert (arena->blksize > 0);
if( blks /= arena->blksize )
do if( chunk = map[arena->scan] )
for( bit = 0; bit < MAP_BITS; bit += CHAR_BIT )
if( nxt = chunk >> bit ) // next byte of chunk
if( nxt < UCHAR_MAX ) // less than 8 blocks available ???
if( ~nxt & 1 || run + ArenaGlom[nxt/2 & 0x3f] < blks )
if( ArenaAfter[nxt/2] < blks ) // bits available in byte
if( nxt & 0x80 ) // establish new run
run = ArenaRun[nxt/2 & 0x3f];
else
run = 0; // no run possible without the leading bit
else // request of fewer than 8 blocks will fit
return vmarkbyte (arena, nxt, blks, bit);
else // 8 blocks or a run spanning two or more map bytes
return vmarkmap (arena, run, blks, bit);
else if( run + CHAR_BIT < blks )
run += CHAR_BIT;// 8 more blocks still not enough
else // or, run now fits
return vmarkmap (arena, run, blks, bit);
else
run = 0; // byte is all zero bits
else
run = 0; // chunk is all zero bits
while( ++arena->scan < arena->mapmax );
arena->scan = 0; // next time start scan from the beginning
return NULL;
}
// allocate a new block of size bytes
void *valloc (size_t bytes)
{
Template *tmpl = ArenaDefs;
unsigned blkmax, doit = 2;
Arena *start, *first;
void *mem;
// round request up to smallest Template blocksize
assert (tmpl->blksize > 0);
if( bytes < tmpl->blksize )
bytes = tmpl->blksize;
// find a suitable template, or default to the
// huge-block template at the end of the table
while( blkmax = tmpl->blkmax )
if( bytes > blkmax )
tmpl++;
else
break;
// scan existing arenas built under the template
// for available space, going through each arena
// once before giving up and building a new arena,
// but do the current allocating arena twice
if( first = tmpl->arena )
if( start = tmpl->scan ) do
if( tmpl->scan == start && !doit-- )
break;
else if( bytes < tmpl->scan->blksize )
continue;
else if( bytes > tmpl->scan->avail )
continue;
else if( tmpl->scan->mapmax ) // scan blocks with maps
if( mem = vscan(tmpl->scan, bytes) )
return mem;
else
continue;
else // huge arena, allocate by clearing available space
return tmpl->scan->avail = 0, tmpl->scan->mem;
while( tmpl->scan = tmpl->scan->next ? tmpl->scan->next : first );
// build a new arena and allocate space there
return varena (bytes, tmpl);
}
// starting with block number, set the
// map bits to make blocks available
void vsetmap (Arena *arena, unsigned block, unsigned blks)
{
unsigned mask, *map = (unsigned *)(arena + 1) + block / MAP_BITS;
unsigned max = arena->mapmax * MAP_BITS;
assert (blks <= max - block);
assert (block <= max);
assert (blks > 0);
if( block > max ) // ensure sanity of starting block no.
block = max;
if( blks > max - block ) // ditto for the block count
blks = max - block;
arena->avail += blks * arena->blksize;
// set block available bits in lowest map entry of the run
// (these are high order bits)
if( blks < MAP_BITS ) // less than full int of bits?
mask = ~0U >> (MAP_BITS - blks);
else
mask = ~0U;
block &= MAP_BITS - 1;
assert (!(*map & mask << block));
*map++ |= mask << block;
// calculate number of blks remaining
if( blks > MAP_BITS - block )
blks -= MAP_BITS - block;
else
return;
// set all block available bits for intermediate map blocks
while( blks > MAP_BITS )
assert (!*map), *map++ = ~0U, blks -= MAP_BITS;
// set block available bits in highest map block
// (these are low order bits)
assert (!(*map & ~0U >> (MAP_BITS - blks)));
*map |= ~0U >> (MAP_BITS - blks);
}
// release memory blocks into an arena
void vfree (void *mem, size_t bytes)
{
unsigned block, entry, blks;
Arena *arena;
Page *page;
// round request up to smallest Template blocksize
assert (ArenaDefs->blksize > 0);
if( bytes < ArenaDefs->blksize )
bytes = ArenaDefs->blksize;
// locate the correct arena
// by scanning all existing
// arena memory ranges
assert (PageLIFO);
if( page = PageLIFO )
entry = page->first;
else
return;
while( 1 )
if( entry < page->size )
if( arena = (Arena *)((char *)page + entry), (char *)mem < arena->mem )
entry += sizeof(Arena) + sizeof(unsigned) * arena->mapmax;
else if( (char *)mem < arena->end )
break;
else
entry += sizeof(Arena) + sizeof(unsigned) * arena->mapmax;
else if( assert(page->prev), page = page->prev )
entry = page->first;
else
return;
// huge blocks have no map, so we just mark
// the entire arena available and return
if( !arena->mapmax ) {
arena->avail = arena->blkmax + 1;
return;
}
// calculate the number of blocks in the request
assert (arena->blksize > 0);
blks = bytes + arena->blksize - 1;
blks /= arena->blksize;
// calculate the starting block number
// and set the available bits
block = ((char *)mem - (char *)arena->mem) / arena->blksize;
vsetmap (arena, block, blks);
}
// trim memory block to smaller new size
// blks is oldsize, bytes is newsize
void vtrim (void *mem, size_t blks, size_t bytes)
{
unsigned block, entry;
Arena *arena;
Page *page;
assert (blks >= bytes);
// round sizes up to smallest Template blocksize
assert (ArenaDefs->blksize > 0);
if( blks < ArenaDefs->blksize )
blks = ArenaDefs->blksize;
if( bytes < ArenaDefs->blksize )
bytes = ArenaDefs->blksize;
// locate the correct arena by scanning
// all existing arena memory ranges
assert (PageLIFO);
if( page = PageLIFO )
entry = page->first;
else
return;
while( 1 )
if( entry < page->size )
if( arena = (Arena *)((char *)page + entry), (char *)mem < arena->mem )
entry += sizeof(Arena) + sizeof(unsigned) * arena->mapmax;
else if( (char *)mem < arena->end )
break;
else
entry += sizeof(Arena) + sizeof(unsigned) * arena->mapmax;
else if( assert (page->prev), page = page->prev )
entry = page->first;
else
return;
// huge blocks have no map; do nothing to resize these.
if( !arena->mapmax )
return;
// calculate the number of blocks in the new size
assert (arena->blksize > 0);
bytes += arena->blksize - 1;
bytes /= arena->blksize;
// calculate the number of blocks in the original block, the
// starting block number, and the change in block counts
block = ((char *)mem - (char *)arena->mem) / arena->blksize;
blks += arena->blksize - 1;
blks /= arena->blksize;
// free extra blocks
if( blks > bytes )
vsetmap (arena, block + bytes, blks - bytes);
}
#ifdef DEFINEMALLOC
void *malloc (size_t bytes)
{
size_t *ans = valloc (bytes + sizeof(size_t));
*ans++ = bytes;
return ans;
}
void free (void *mem)
{
size_t *size;
if( size = (size_t *)mem )
size--, vfree (size, *size + sizeof(size_t));
}
size_t msizeof (void *mem)
{
size_t *size;
if( size = (size_t *)mem )
return *--size;
return 0;
}
void *calloc (size_t ele, size_t num)
{
size_t xtra;
void *ans;
if( xtra = ele % sizeof(unsigned) - 1 )
ele += sizeof(unsigned) - xtra;
ans = malloc (ele * num);
memset (ans, 0, ele * num);
return ans;
}
void *realloc (void *mem, size_t size)
{
size_t *old;
void *ans;
if( old = (size_t *)mem )
if( *--old >= size )
return vtrim (old, *old, size), *old = size, mem;
ans = malloc (size);
if( old )
memcpy (ans, mem, *old), vfree (old, *old + sizeof(size_t));
return ans;
}
#endif
#ifdef ARENASTANDALONE
#include <stdio.h>
FILE *Out = stdout;
FILE *In = stdin;
char *Buff[5000];
char Line[80000];
void blkaddrs (Arena *arena, unsigned idx)
{
char *mem = arena->mem + idx * MAP_BITS * arena->blksize;
unsigned *map = (unsigned *)(arena + 1) + idx;
unsigned bit;
for( bit = 0; bit < MAP_BITS; bit++ )
if( !(*map >> bit & 1) )
fprintf (stderr, "block %x not freed\n", mem + bit * arena->blksize);
}
int main (int argc, char **argv)
{
int usemalloc = 0;
int check = 0;
int count = 0;
int debug = 0;
int len, idx;
// process any program options present
if( argc > 1 )
if( argv[1][0] == '-' )
for( argc--, argv++; *++(*argv); )
switch( **argv | 0x20 ) {
case 'm': usemalloc++; break;
case 'd': debug++; break;
case 'c': check++; break;
}
// open input and output files if present
if( argc > 1 )
if( !(In = fopen (argv[1], "r")) )
return 1;
if( argc > 2 )
if( !(Out = fopen (argv[2], "w")) )
return 1;
// scramble the input as the test plan
while( fgets (Line, sizeof(Line), In) ) {
if( count < 5000 )
idx = count++;
else {
idx = (unsigned)rand() % count;
len = strlen(Buff[idx]);
if( debug )
fprintf(stderr, "free %.6d %x\n", len + 1, Buff[idx]);
fwrite (Buff[idx], len, 1, Out);
usemalloc ? free (Buff[idx]) : vfree (Buff[idx], len + 1);
}
len = strlen (Line);
Buff[idx] = usemalloc ? malloc (len + 1) : valloc (len + 1);
if( debug )
fprintf(stderr, "req %.6d %x\n", len + 1, Buff[idx]);
memcpy (Buff[idx], Line, len + 1);
}
while( count-- ) {
len = strlen (Buff[count]);
fwrite (Buff[count], len, 1, Out);
usemalloc ? free (Buff[count]) : vfree (Buff[count], len + 1);
if( debug )
fprintf(stderr, "free %.6d %x\n", len + 1, Buff[count]);
}
fclose (Out);
fclose (In);
// ensure all bits were reset in all arenas
if( check ) {
unsigned entry, idx;
Arena *arena;
Page *page;
if( page = PageLIFO ) do
for( entry = page->first; entry < page->size; entry += sizeof(Arena) + arena->mapmax * sizeof(unsigned) )
for( arena = (Arena *)((char *)page + entry), idx = 0; idx < arena->mapmax; idx++ )
if( ((unsigned *)(arena + 1))[idx] != ~0U )
blkaddrs(arena, idx);
while( page = page->prev );
}
}
#endif