/* TAGS: cpp ext fifo big */ /* VERIFY_OPTS: -o nofail:malloc */ /* CC_OPTS: */ #include #include #include #include #ifdef __divine__ #include #include // V: ok.sc@safety // v: ok.tso4@safety V_OPT: --relaxed-memory tso:4 // v: ok.tso16@safety V_OPT: --relaxed-memory tso:16 // V: ok.sc@local V_OPT: --nontermination local // v: ok.tso4@local V_OPT: --nontermination local --relaxed-memory tso:4 // v: ok.tso16@local V_OPT: --nontermination local --relaxed-memory tso:16 // V: ok.sc@global V_OPT: --nontermination global ERR: .* // v: ok.tso4@global V_OPT: --nontermination global --relaxed-memory tso:4 ERR: .* // v: ok.tso16@global V_OPT: --nontermination global --relaxed-memory tso:16 ERR: .* // V: testbug.sc@safety CC_OPT: -DTESTBUG // v: testbug.tso4@safety V_OPT: --relaxed-memory tso:4 CC_OPT: -DTESTBUG // v: testbug.tso16@safety V_OPT: --relaxed-memory tso:16 CC_OPT: -DTESTBUG // V: testbug.sc@local V_OPT: --nontermination local ERR: error trace: function return CC_OPT: -DTESTBUG // v: testbug.tso4@local V_OPT: --nontermination local --relaxed-memory tso:4 ERR: error trace: function return CC_OPT: -DTESTBUG // v: testbug.tso16@local V_OPT: --nontermination local --relaxed-memory tso:16 ERR: error trace: function return CC_OPT: -DTESTBUG // V: testbug.sc@global V_OPT: --nontermination global ERR: .* CC_OPT: -DTESTBUG // v: testbug.tso4@global V_OPT: --nontermination global --relaxed-memory tso:4 ERR: .* CC_OPT: -DTESTBUG // v: testbug.tso16@global V_OPT: --nontermination global --relaxed-memory tso:16 ERR: .* CC_OPT: -DTESTBUG __attribute__((constructor)) static void disbaleMallocFail() { __dios_configure_fault( _DiOS_SF_Malloc, _DiOS_FC_NoFail ); } #endif const int cacheLine = 64; /** * A simple queue (First-In, First-Out). Concurrent access to the ends of the * queue is supported -- a thread may write to the queue while another is * reading. Concurrent access to a single end is, however, not supported. * * The NodeSize parameter defines a size of single block of objects. By * default, we make the node a page-sized object -- this seems to work well in * practice. We rely on the allocator to align the allocated blocks reasonably * to give good cache usage. */ template< typename T, int NodeSize = (512 - cacheLine - sizeof(int) - sizeof(void*)) / sizeof(T) > struct Fifo { protected: // the Node layout puts read and write counters far apart to avoid // them sharing a cache line, since they are always written from // different threads struct Node { T *read; char padding[ cacheLine - sizeof(int) ]; T buffer[ NodeSize ]; std::atomic< T * > write; Node *next; Node() { read = write = buffer; next = 0; } }; // pad the fifo object to ensure that head/tail pointers never // share a cache line with anyone else char _padding1[cacheLine-2*sizeof(Node*)]; Node *head; char _padding2[cacheLine-2*sizeof(Node*)]; std::atomic< Node * > tail; char _padding3[cacheLine-2*sizeof(Node*)]; public: Fifo() { head = tail = new Node(); assert( empty() ); } // copying a fifo is not allowed Fifo( const Fifo & ) = delete; ~Fifo() { termsec::CheckFunction check; while ( head != tail.load( std::memory_order_relaxed ) ) { Node *next = head->next; assert( next != 0 ); delete head; head = next; } delete head; } void push( const T&x ) { Node *t; auto *ltail = tail.load( std::memory_order_acquire ); if ( ltail->write.load( std::memory_order_acquire ) == ltail->buffer + NodeSize ) t = new Node(); else t = ltail; *t->write = x; t->write.fetch_add( 1, std::memory_order_release ); if ( ltail != t ) { ltail->next = t; tail.store( t, std::memory_order_release ); } } bool empty() { return head == tail.load( std::memory_order_relaxed ) && head->read >= head->write.load( std::memory_order_relaxed ); } int size() { int size = 0; Node *n = head; termsec::CheckFunction check; do { size += n->write.load( std::memory_order_relaxed ) - n->read; n = n->next; } while (n); return size; } void dropHead() { Node *old = head; head = head->next; assert( !!head ); delete old; } void pop() { assert( !empty() ); ++ head->read; if ( head->read == head->buffer + NodeSize ) { if ( head != tail.load( std::memory_order_relaxed ) ) { dropHead(); } } termsec::CheckFunction check; // the following can happen when head->next is 0 even though head->read // has reached NodeSize, *and* no front() has been called in the meantime if ( head != tail.load( std::memory_order_relaxed ) && head->read > head->buffer + NodeSize ) { dropHead(); pop(); } } T &front( bool wait = false ) { { termsec::CheckFunction check; while ( wait && empty() ) ; } assert( !!head ); assert( !empty() ); // last pop could have left us with empty queue exactly at an // edge of a block, which leaves head->read == NodeSize if ( head->read == head->buffer + NodeSize ) { dropHead(); } return *head->read; } }; /////////////////////////////// const int B = 2; const int L = 5; const int N = 2; using Q = Fifo< int, B >; Q *q = nullptr; std::atomic< int > length( 0 ); void *push( void * ) { int i = 0; try { while ( true ) if ( length < L ) { length.fetch_add( 1, std::memory_order_relaxed ); q->push( 42 + i ); i = (i + 1) % (N - 1); } } catch (...) {} return nullptr; }; int main() { q = new Q; pthread_t p; pthread_create( &p, nullptr, &push, nullptr ); while (true) for ( int i = 0; i < N - 1; ++i ) { int got = q->front( true ); assert( 42 + i == got ); assert( 1 <= length.load( std::memory_order_relaxed ) ); q->pop(); #ifdef TESTBUG length.fetch_sub( std::memory_order_relaxed ); #else length.fetch_sub( 1, std::memory_order_relaxed ); #endif } pthread_join( p, nullptr ); delete q; return 0; }