see also:
  • This is a much more comprehensive comparison program and article, but without hard results
  • This contains some info about the V-table.
  • This detailed article on the matter

Here's a very simple program to try and measure the cost of using virtual functions.

The Rectangle and Square classes derive from Shape. The area() function call is made polymorphically.

The Rectangle_plain class does a poor-man's polymorphism. That is, it holds on to a flag to tell it whether it is a square or rectangle and does the right thing for you. This is probably not totally fair, since the fake polymorphism just adds a compare. Still, this shows a sort of worst case comparison.

Here's the results on an Intel Nocona Xeon 3.4 in 32 bit mode. You'll want to compile it like this: icc -DDO_PLAIN_FUNCS -O3 vfunc.cpp

Compiler setup virtual ms/call non-virtual ms/call plain function ms/call non-virtual speedup plain function speedup
icc -O0 .022 .018 .0062 1.2x 3.5x
icc -O2 .0184 .0135 .005 1.36x 31x
icc -O3 .0183 .0136 .0005 1.34x 37x
icc -O3 -prof_use .0184 .0138 .0006 1.33x 31x
icc -O3 -ip .0184 .0136 .0005 1.35x 36.8x
icc -O3 -ip -prof_use .0153 .0138 .0006 1.1x 25.5x
icc -O3 -ipo .0157 .0136 .0005 1.15x 31x
icc -O3 -ipo -prof_use .0136 .0137 .0005 1x 27x

Although this is a very small, contrived test...

  • I have been told that taking off the plain function test helps performance. I haven't been able to get back to the machine where I did the testing. But to try it, simply compile without the -DDO_PLAIN_FUNCS.
  • There is consistently more overhead of like 20-36% for a virutal call
  • Don't ask me why, but somehow -ipo helped a single file program (?!?). The compiler spits out... IPO: performing single-file optimizations. I guess IPO can help a single file as well.
  • PGO helped virtual function calls significantly!
  • -O3 -ip is no better than just -O3. But -O3 -ip -prof_use is measurably better than -O3 -ip / -O3. The profile data is helping the 'ip' work. This is a documented feature (I think the compiler makes inlining decisions based on PGO) but seems strange that PGO would be needed on such a simple program.
  • Non-intuitively, on a single file, -O3 -ipo is superior to -O3 -ip.
  • Ditto for -O3 -ipo -prof_use vs. -O3 -ip -prof_use.

#include <iostream>
#include <time.h>

using namespace std;

// Size of our array to walk through
#define ITERS 1000000

// how many times we'll do the test for each array
#define REPS  100

class Rectangle_plain
{
  private:
    int height;
    int width;
    int is_square;

  public:
    Rectangle_plain(int h_in)           {  height = h_in; is_square = 1; }
    Rectangle_plain(int h_in, int w_in) {  height = h_in; width = w_in; is_square = 0;}
    int area()            
    { if (is_square) return height*height; 
      else           return height*width;
    }
    ~Rectangle_plain()        {}
};

class Shape
{
   public:
     Shape() {}
     virtual ~Shape() {}
     virtual int area() = 0;
};

class Rectangle : public Shape
{
  private:
    int height, width;

  public:
    Rectangle(int h_in, int w_in) : Shape()
    {  height = h_in;
       width  = w_in;
    }
    int area() { return height*width; }
    ~Rectangle() {}
};

class Square : public Shape
{
  private:
    int height;

  public:
    Square(int h_in) : Shape() { height = h_in; }
    int area() { return height*height; }
    ~Square() {}
};

int compute_area(int h, int w)
{   return h*w;
}

int main(void)
{
   long tot_area;
   clock_t start, end;

   Shape** ptrs = 
      (Shape**)malloc(ITERS*sizeof(Shape**));

   Rectangle_plain** ptrs_plain = 
      (Rectangle_plain**)malloc(ITERS*sizeof(Rectangle_plain**));

   /* Make up a random bunch of both virtual function-based and
    *  non-virtual function-based objects in a random list
    */
   for (long i = 0; i < ITERS; i++)
   {
      int which = (int) (2.0*rand()/(RAND_MAX+1.0));
      if (which)
      {  ptrs[i]       = new Square(which + 1);
         ptrs_plain[i] = new Rectangle_plain(which + 1);
      }
      else
      {  ptrs[i]       = new Rectangle(which + 1, which + 2);
         ptrs_plain[i] = new Rectangle_plain(which + 1, which + 2);
      }
   }
  
   tot_area = 0; 
   start = clock();
   for (long j = 0; j < REPS; j++)
   for (long i = 0; i < ITERS; i++)
   { tot_area += ptrs[i]->area();
   }
   end=clock();
   float calls_per_sec_vfunc = (float)(end-start)*1000000/CLOCKS_PER_SEC/ITERS/REPS;
   printf("Time for virtual functions: %f, %f ms/call checksum: %ld\n",
       (float)(end-start)/CLOCKS_PER_SEC,
       calls_per_sec_vfunc,
       tot_area);

   tot_area = 0; 
   start = clock();
   for (long j = 0; j < REPS; j++)
   for (long i = 0; i < ITERS; i++)
   {  tot_area += ptrs_plain[i]->area();
   }
   end=clock();
   float calls_per_sec_class = (float)(end-start)*1000000/CLOCKS_PER_SEC/ITERS/REPS;
   printf("Time for ordinary class functions: %f, %f ms/call checksum: %ld\n",
       (float)(end-start)/CLOCKS_PER_SEC,
       calls_per_sec_class,
       tot_area);

#ifdef DO_PLAIN_FUNCS
   tot_area = 0; 
   start = clock();
   for (long j = 0; j < REPS; j++)
   for (long i = 0; i < ITERS; i++)
   {  tot_area += compute_area(1, i+1);
   }
   end=clock();
   float calls_per_sec_func = (float)(end-start)*1000000/CLOCKS_PER_SEC/ITERS/REPS;
   printf("Time for ordinary functions: %f, %f ms/call %ld\n",
       (float)(end-start)/CLOCKS_PER_SEC,
       calls_per_sec_func,
       tot_area);
#endif

   cout << endl << "Non-virtuals ran " << calls_per_sec_vfunc / calls_per_sec_class << "x faster than virtuals" << endl;
#ifdef DO_PLAIN_FUNCS
   cout << "Plain calls ran " << calls_per_sec_vfunc / calls_per_sec_func << "x faster than virtuals" << endl;
#endif

   for (long i = 0; i < ITERS; i++)
   {  delete ptrs[i];
      delete ptrs_plain[i];
   }

   free(ptrs);
   free(ptrs_plain);
}

-- MattWalsh - 24 Nov 2004

Topic revision: r1 - 25 Nov 2004 - MattWalsh
 
This site is powered by the TWiki collaboration platformCopyright © 2008-2012 by the contributing authors. All material on this collaboration platform is the property of the contributing authors.
Ideas, requests, problems regarding TWiki? Send feedback