mt_allocator.h   [plain text]

// MT-optimized allocator -*- C++ -*-

// Copyright (C) 2003, 2004 Free Software Foundation, Inc.
// This file is part of the GNU ISO C++ Library.  This library is free
// software; you can redistribute it and/or modify it under the
// terms of the GNU General Public License as published by the
// Free Software Foundation; either version 2, or (at your option)
// any later version.

// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// GNU General Public License for more details.

// You should have received a copy of the GNU General Public License along
// with this library; see the file COPYING.  If not, write to the Free
// Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307,
// USA.

// As a special exception, you may use this file as part of a free software
// library without restriction.  Specifically, if other files instantiate
// templates or use macros or inline functions from this file, or you compile
// this file and link it with other files to produce an executable, this
// file does not by itself cause the resulting executable to be covered by
// the GNU General Public License.  This exception does not however
// invalidate any other reasons why the executable file might be covered by
// the GNU General Public License.

/** @file ext/mt_allocator.h
 *  This file is a GNU extension to the Standard C++ Library.
 *  You should only include this header if you are using GCC 3 or later.

#define _MT_ALLOCATOR_H 1

#include <new>
#include <cstdlib>
#include <bits/functexcept.h>
#include <bits/gthr.h>
#include <bits/atomicity.h>

namespace __gnu_cxx
   *  This is a fixed size (power of 2) allocator which - when
   *  compiled with thread support - will maintain one freelist per
   *  size per thread plus a "global" one. Steps are taken to limit
   *  the per thread freelist sizes (by returning excess back to
   *  "global").
   *  Further details:
  template<typename _Tp>
    class __mt_alloc
      typedef size_t                    size_type;
      typedef ptrdiff_t                 difference_type;
      typedef _Tp*                      pointer;
      typedef const _Tp*                const_pointer;
      typedef _Tp&                      reference;
      typedef const _Tp&                const_reference;
      typedef _Tp                       value_type;

      template<typename _Tp1>
        struct rebind
        { typedef __mt_alloc<_Tp1> other; };

      __mt_alloc() throw() 
	// XXX

      __mt_alloc(const __mt_alloc&) throw() 
	// XXX

      template<typename _Tp1>
        __mt_alloc(const __mt_alloc<_Tp1>& obj) throw()  
	  // XXX

      ~__mt_alloc() throw() { }

      address(reference __x) const
      { return &__x; }

      address(const_reference __x) const
      { return &__x; }

      max_size() const throw() 
      { return size_t(-1) / sizeof(_Tp); }

      // 402. wrong new expression in [some_] allocator::construct
      construct(pointer __p, const _Tp& __val) 
      { ::new(__p) _Tp(__val); }

      destroy(pointer __p) { __p->~_Tp(); }

      allocate(size_type __n, const void* = 0);

      deallocate(pointer __p, size_type __n);

      // Variables used to configure the behavior of the allocator,
      // assigned and explained in detail below.
      struct _Tune
	// Allocation requests (after round-up to power of 2) below
	// this value will be handled by the allocator. A raw new/
	// call will be used for requests larger than this value.
	size_t	_M_max_bytes; 

	// Size in bytes of the smallest bin (must be a power of 2).
	size_t  _M_min_bin;

	// In order to avoid fragmenting and minimize the number of
	// new() calls we always request new memory using this
	// value. Based on previous discussions on the libstdc++
	// mailing list we have choosen the value below.
	// See
	size_t 	_M_chunk_size;

	// The maximum number of supported threads. Our Linux 2.4.18
	// reports 4070 in /proc/sys/kernel/threads-max
	size_t 	_M_max_threads;

	// Each time a deallocation occurs in a threaded application
	// we make sure that there are no more than
	// _M_freelist_headroom % of used memory on the freelist. If
	// the number of additional records is more than
	// _M_freelist_headroom % of the freelist, we move these
	// records back to the global pool.
	size_t 	_M_freelist_headroom;

	// Set to true forces all allocations to use new().
	bool 	_M_force_new; 
	: _M_max_bytes(128), _M_min_bin(8),
	  _M_chunk_size(4096 - 4 * sizeof(void*)), 
	  _M_max_threads(4096), _M_freelist_headroom(10), 
	  _M_force_new(getenv("GLIBCXX_FORCE_NEW") ? true : false)
	{ }

	_Tune(size_t __maxb, size_t __minbin, size_t __chunk,
	      size_t __maxthreads, size_t __headroom, bool __force) 
	: _M_max_bytes(__maxb), _M_min_bin(__minbin), _M_chunk_size(__chunk), 
	  _M_max_threads(__maxthreads), _M_freelist_headroom(__headroom), 
	{ }

      // We need to create the initial lists and set up some variables
      // before we can answer to the first request for memory.
#ifdef __GTHREADS
      static __gthread_once_t 		_S_once;
      static bool 			_S_init;

      static void

      // Configuration options.
      static _Tune 	       		_S_options;

      static const _Tune
      { return _S_options; }

      static void
      _S_set_options(_Tune __t)
	if (!_S_init)
	  _S_options = __t;

      // Using short int as type for the binmap implies we are never
      // caching blocks larger than 65535 with this allocator
      typedef unsigned short int        _Binmap_type;
      static _Binmap_type* 		_S_binmap;

      // Each requesting thread is assigned an id ranging from 1 to
      // _S_max_threads. Thread id 0 is used as a global memory pool.
      // In order to get constant performance on the thread assignment
      // routine, we keep a list of free ids. When a thread first
      // requests memory we remove the first record in this list and
      // stores the address in a __gthread_key. When initializing the
      // __gthread_key we specify a destructor. When this destructor
      // (i.e. the thread dies) is called, we return the thread id to
      // the front of this list.
#ifdef __GTHREADS
      struct _Thread_record
        // Points to next free thread id record. NULL if last record in list.
        _Thread_record* volatile        _M_next;

	// Thread id ranging from 1 to _S_max_threads.
        size_t                          _M_id;

      static _Thread_record* volatile 	_S_thread_freelist_first;
      static __gthread_mutex_t 		_S_thread_freelist_mutex;
      static __gthread_key_t 		_S_thread_key;

      static void 
      _S_destroy_thread_key(void* __freelist_pos);

      static size_t 

      union _Block_record
	// Points to the block_record of the next free block.
        _Block_record* volatile         _M_next;

#ifdef __GTHREADS
	// The thread id of the thread which has requested this block.
        size_t                          _M_thread_id;

      struct _Bin_record
	// An "array" of pointers to the first free block for each
	// thread id. Memory to this "array" is allocated in _S_initialize()
	// for _S_max_threads + global pool 0.
        _Block_record** volatile        _M_first;

#ifdef __GTHREADS
	// An "array" of counters used to keep track of the amount of
	// blocks that are on the freelist/used for each thread id.
	// Memory to these "arrays" is allocated in _S_initialize() for
	// _S_max_threads + global pool 0.
        size_t* volatile                _M_free;
        size_t* volatile                _M_used;

	// Each bin has its own mutex which is used to ensure data
	// integrity while changing "ownership" on a block.  The mutex
	// is initialized in _S_initialize().
        __gthread_mutex_t*              _M_mutex;

      // An "array" of bin_records each of which represents a specific
      // power of 2 size. Memory to this "array" is allocated in
      // _S_initialize().
      static _Bin_record* volatile     	_S_bin;

      // Actual value calculated in _S_initialize().
      static size_t 	       	     	_S_bin_size; 

  template<typename _Tp>
    typename __mt_alloc<_Tp>::pointer
    allocate(size_type __n, const void*)
      // Although the test in __gthread_once() would suffice, we wrap
      // test of the once condition in our own unlocked check. This
      // saves one function call to pthread_once() (which itself only
      // tests for the once value unlocked anyway and immediately
      // returns if set)
      if (!_S_init)
#ifdef __GTHREADS
	  if (__gthread_active_p())
	    __gthread_once(&_S_once, _S_initialize);
	  if (!_S_init)
      // Requests larger than _M_max_bytes are handled by new/delete
      // directly.
      const size_t __bytes = __n * sizeof(_Tp);
      if (__bytes > _S_options._M_max_bytes || _S_options._M_force_new)
	  void* __ret = ::operator new(__bytes);
	  return static_cast<_Tp*>(__ret);

      // Round up to power of 2 and figure out which bin to use.
      const size_t __which = _S_binmap[__bytes];      
      const size_t __thread_id = _S_get_thread_id();
      // Find out if we have blocks on our freelist.  If so, go ahead
      // and use them directly without having to lock anything.
      const _Bin_record& __bin = _S_bin[__which];
      _Block_record* __block = NULL;
      if (__bin._M_first[__thread_id] == NULL)
	  const size_t __bin_size = ((_S_options._M_min_bin << __which)
				     + sizeof(_Block_record));
	  size_t __block_count = _S_options._M_chunk_size / __bin_size;	  

	  // Are we using threads?
	  // - Yes, check if there are free blocks on the global
	  //   list. If so, grab up to __block_count blocks in one
	  //   lock and change ownership. If the global list is 
	  //   empty, we allocate a new chunk and add those blocks 
	  //   directly to our own freelist (with us as owner).
	  // - No, all operations are made directly to global pool 0
	  //   no need to lock or change ownership but check for free
	  //   blocks on global list (and if not add new ones) and
	  //   get the first one.
#ifdef __GTHREADS
	  if (__gthread_active_p())
	      if (__bin._M_first[0] == NULL)
		  // No need to hold the lock when we are adding a
		  // whole chunk to our own list.
		  void* __v = ::operator new(_S_options._M_chunk_size);
		  __bin._M_first[__thread_id] = static_cast<_Block_record*>(__v);
		  __bin._M_free[__thread_id] = __block_count;

		  __block = __bin._M_first[__thread_id];
		  while (__block_count-- > 0)
		      char* __c = reinterpret_cast<char*>(__block) + __bin_size;
		      __block->_M_next = reinterpret_cast<_Block_record*>(__c);
		      __block = __block->_M_next;
		  __block->_M_next = NULL;
		  // Is the number of required blocks greater than or
		  // equal to the number that can be provided by the
		  // global free list?
		  __bin._M_first[__thread_id] = __bin._M_first[0];
		  if (__block_count >= __bin._M_free[0])
		      __bin._M_free[__thread_id] = __bin._M_free[0];
		      __bin._M_free[0] = 0;
		      __bin._M_first[0] = NULL;
		      __bin._M_free[__thread_id] = __block_count;
		      __bin._M_free[0] -= __block_count;
		      __block = __bin._M_first[0];
		      while (__block_count-- > 0)
			__block = __block->_M_next;
		      __bin._M_first[0] = __block->_M_next;
		      __block->_M_next = NULL;
	      void* __v = ::operator new(_S_options._M_chunk_size);
	      __bin._M_first[0] = static_cast<_Block_record*>(__v);
	      __block = __bin._M_first[0];
	      while (__block_count-- > 0)
		  char* __c = reinterpret_cast<char*>(__block) + __bin_size;
		  __block->_M_next = reinterpret_cast<_Block_record*>(__c);
		  __block = __block->_M_next;
	      __block->_M_next = NULL;

      __block = __bin._M_first[__thread_id];
      __bin._M_first[__thread_id] = __bin._M_first[__thread_id]->_M_next;
#ifdef __GTHREADS
      if (__gthread_active_p())
	  __block->_M_thread_id = __thread_id;

      char* __c = reinterpret_cast<char*>(__block) + sizeof(_Block_record);
      return static_cast<_Tp*>(static_cast<void*>(__c));
  template<typename _Tp>
    deallocate(pointer __p, size_type __n)
      // Requests larger than _M_max_bytes are handled by operators
      // new/delete directly.
      const size_t __bytes = __n * sizeof(_Tp);
      if (__bytes > _S_options._M_max_bytes || _S_options._M_force_new)
	  ::operator delete(__p);
      // Round up to power of 2 and figure out which bin to use.
      const size_t __which = _S_binmap[__bytes];
      const _Bin_record& __bin = _S_bin[__which];

      char* __c = reinterpret_cast<char*>(__p) - sizeof(_Block_record);
      _Block_record* __block = reinterpret_cast<_Block_record*>(__c);
#ifdef __GTHREADS
      if (__gthread_active_p())
	  // Calculate the number of records to remove from our freelist:
	  // in order to avoid too much contention we wait until the
	  // number of records is "high enough".
	  const size_t __thread_id = _S_get_thread_id();

	  long __remove = ((__bin._M_free[__thread_id]
			    * _S_options._M_freelist_headroom)
			   - __bin._M_used[__thread_id]);
	  if (__remove > static_cast<long>(100 * (_S_bin_size - __which)
					   * _S_options._M_freelist_headroom)
	      && __remove > static_cast<long>(__bin._M_free[__thread_id]))
	      _Block_record* __tmp = __bin._M_first[__thread_id];
	      _Block_record* __first = __tmp;
	      __remove /= _S_options._M_freelist_headroom;
	      const long __removed = __remove;
	      while (__remove-- > 0)
		__tmp = __tmp->_M_next;
	      __bin._M_first[__thread_id] = __tmp->_M_next;
	      __bin._M_free[__thread_id] -= __removed;

	      __tmp->_M_next = __bin._M_first[0];
	      __bin._M_first[0] = __first;
	      __bin._M_free[0] += __removed;
	  // Return this block to our list and update counters and
	  // owner id as needed.

	  __block->_M_next = __bin._M_first[__thread_id];
	  __bin._M_first[__thread_id] = __block;
	  // Single threaded application - return to global pool.
	  __block->_M_next = __bin._M_first[0];
	  __bin._M_first[0] = __block;
  template<typename _Tp>
      if (_S_options._M_force_new)

      // Calculate the number of bins required based on _M_max_bytes.
      // _S_bin_size is statically-initialized to one.
      size_t __bin_size = _S_options._M_min_bin;
      while (_S_options._M_max_bytes > __bin_size)
	  __bin_size <<= 1;

      // Setup the bin map for quick lookup of the relevant bin.
      const size_t __j = (_S_options._M_max_bytes + 1) * sizeof(_Binmap_type);
      _S_binmap = static_cast<_Binmap_type*>(::operator new(__j));

      _Binmap_type* __bp = _S_binmap;
      _Binmap_type __bin_max = _S_options._M_min_bin;
      _Binmap_type __bint = 0;
      for (_Binmap_type __ct = 0; __ct <= _S_options._M_max_bytes; ++__ct)
          if (__ct > __bin_max)
              __bin_max <<= 1;
          *__bp++ = __bint;

      // Initialize _S_bin and its members.
      void* __v = ::operator new(sizeof(_Bin_record) * _S_bin_size);
      _S_bin = static_cast<_Bin_record*>(__v);

      // If __gthread_active_p() create and initialize the list of
      // free thread ids. Single threaded applications use thread id 0
      // directly and have no need for this.
#ifdef __GTHREADS
      if (__gthread_active_p())
	  const size_t __k = sizeof(_Thread_record) * _S_options._M_max_threads;
	  __v = ::operator new(__k);
          _S_thread_freelist_first = static_cast<_Thread_record*>(__v);

	  // NOTE! The first assignable thread id is 1 since the
	  // global pool uses id 0
          size_t __i;
          for (__i = 1; __i < _S_options._M_max_threads; ++__i)
	      _Thread_record& __tr = _S_thread_freelist_first[__i - 1];
              __tr._M_next = &_S_thread_freelist_first[__i];
              __tr._M_id = __i;

          // Set last record.
          _S_thread_freelist_first[__i - 1]._M_next = NULL;
          _S_thread_freelist_first[__i - 1]._M_id = __i;

	  // Make sure this is initialized.
          // Initialize per thread key to hold pointer to
          // _S_thread_freelist.
          __gthread_key_create(&_S_thread_key, _S_destroy_thread_key);

	  const size_t __max_threads = _S_options._M_max_threads + 1;
	  for (size_t __n = 0; __n < _S_bin_size; ++__n)
	      _Bin_record& __bin = _S_bin[__n];
	      __v = ::operator new(sizeof(_Block_record*) * __max_threads);
	      __bin._M_first = static_cast<_Block_record**>(__v);

	      __v = ::operator new(sizeof(size_t) * __max_threads);
              __bin._M_free = static_cast<size_t*>(__v);

	      __v = ::operator new(sizeof(size_t) * __max_threads);
              __bin._M_used = static_cast<size_t*>(__v);

	      __v = ::operator new(sizeof(__gthread_mutex_t));
              __bin._M_mutex = static_cast<__gthread_mutex_t*>(__v);

                // Do not copy a POSIX/gthr mutex once in use.
                __gthread_mutex_t __tmp = __GTHREAD_MUTEX_INIT;
                *__bin._M_mutex = __tmp;
              { __GTHREAD_MUTEX_INIT_FUNCTION(__bin._M_mutex); }

	      for (size_t __threadn = 0; __threadn < __max_threads;
		  __bin._M_first[__threadn] = NULL;
		  __bin._M_free[__threadn] = 0;
		  __bin._M_used[__threadn] = 0;
	for (size_t __n = 0; __n < _S_bin_size; ++__n)
	    _Bin_record& __bin = _S_bin[__n];
	    __v = ::operator new(sizeof(_Block_record*));
	    __bin._M_first = static_cast<_Block_record**>(__v);
	    __bin._M_first[0] = NULL;

      _S_init = true;

  template<typename _Tp>
#ifdef __GTHREADS
      // If we have thread support and it's active we check the thread
      // key value and return its id or if it's not set we take the
      // first record from _S_thread_freelist and sets the key and
      // returns it's id.
      if (__gthread_active_p())
          _Thread_record* __freelist_pos =
	  if (__freelist_pos == NULL)
	      // Since _S_options._M_max_threads must be larger than
	      // the theoretical max number of threads of the OS the
	      // list can never be empty.
              __freelist_pos = _S_thread_freelist_first;
              _S_thread_freelist_first = _S_thread_freelist_first->_M_next;

          return __freelist_pos->_M_id;
      // Otherwise (no thread support or inactive) all requests are
      // served from the global pool 0.
      return 0;

#ifdef __GTHREADS
  template<typename _Tp>
    _S_destroy_thread_key(void* __freelist_pos)
      // Return this thread id record to front of thread_freelist.
      _Thread_record* __tr = static_cast<_Thread_record*>(__freelist_pos);
      __tr->_M_next = _S_thread_freelist_first;
      _S_thread_freelist_first = __tr;

  template<typename _Tp>
    inline bool
    operator==(const __mt_alloc<_Tp>&, const __mt_alloc<_Tp>&)
    { return true; }
  template<typename _Tp>
    inline bool
    operator!=(const __mt_alloc<_Tp>&, const __mt_alloc<_Tp>&)
    { return false; }

  template<typename _Tp> 
    bool __mt_alloc<_Tp>::_S_init = false;

  template<typename _Tp> 
    typename __mt_alloc<_Tp>::_Tune __mt_alloc<_Tp>::_S_options;

  template<typename _Tp> 
    typename __mt_alloc<_Tp>::_Binmap_type* __mt_alloc<_Tp>::_S_binmap;

  template<typename _Tp> 
    typename __mt_alloc<_Tp>::_Bin_record* volatile __mt_alloc<_Tp>::_S_bin;

  template<typename _Tp> 
    size_t __mt_alloc<_Tp>::_S_bin_size = 1;

  // Actual initialization in _S_initialize().
#ifdef __GTHREADS
  template<typename _Tp> 
    __gthread_once_t __mt_alloc<_Tp>::_S_once = __GTHREAD_ONCE_INIT;

  template<typename _Tp> 
    typename __mt_alloc<_Tp>::_Thread_record*
    volatile __mt_alloc<_Tp>::_S_thread_freelist_first = NULL;

  template<typename _Tp> 
    __gthread_key_t __mt_alloc<_Tp>::_S_thread_key;

  template<typename _Tp> 
    __mt_alloc<_Tp>::_S_thread_freelist_mutex = __GTHREAD_MUTEX_INIT;
} // namespace __gnu_cxx
