hn-classics/_stories/2005/10266440.md

28 KiB

Source

C++ vs OCaml: Ray tracer comparison

| ----- | | | |

| ----- | | | Home page | |

| ----- | |

C++ vs OCaml: Ray tracer comparison

Studying equivalently cutdown versions of our ray tracer written in C++ and OCaml is a great way to learn the differences between C++ and OCaml. This web page presents two versions of the same ray tracer, written in C++ and OCaml, and compares the code used to implement equivalent functionality in these two languages. The chosen ray tracer is particularly well suited to this task because it involves both integer and floating point operations, imperative-style loops, functional-style recursion, a non-trivial data structure (a tree) and many other interesting contructs that appear regularly in real programming.

Note that these programs are not intended to be optimised for performance. On the contrary, these programs are written in a straightforward, comprehensible style. However, we shall examine the performance of these programs at the end of this page as it is interesting to see how much the compilers can optimise these simple implementations.

Whole programs

Let's begin by stating the complete source codes of the 105-line C++ and 56-line OCaml programs:

|

C++ (download)

#include <list>
#include <iostream>
#include <limits>
#include <cmath>

using namespace std;

numeric_limits<double> real;
double delta = sqrt(real.epsilon()), infinity = real.infinity();

struct Vec {
  double x, y, z;
  Vec(double x2, double y2, double z2) : x(x2), y(y2), z(z2) {}
};
Vec operator+(const Vec &a;, const Vec &b;)
{ return Vec(a.x+b.x, a.y+b.y, a.z+b.z); }
Vec operator-(const Vec &a;, const Vec &b;)
{ return Vec(a.x-b.x, a.y-b.y, a.z-b.z); }
Vec operator*(double a, const Vec &b;) { return Vec(a*b.x, a*b.y, a*b.z); }
double dot(const Vec &a;, const Vec &b;) { return a.x*b.x + a.y*b.y + a.z*b.z; }
Vec unitise(const Vec &a;) { return (1 / sqrt(dot(a, a))) * a; }

typedef pair<double, Vec> Hit;

struct Ray {
  Vec orig, dir;
  Ray(const Vec &o;, const Vec &d;) : orig(o), dir(d) {}
};

struct Scene {
  virtual ~Scene() {};
  virtual Hit intersect(const Hit &, const Ray &) const = 0;
};

struct Sphere : public Scene {
  Vec center;
  double radius;

  Sphere(Vec c, double r) : center(c), radius(r) {}
  ~Sphere() {}

  double ray_sphere(const Ray &ray;) const {
    Vec v = center - ray.orig;
    double b = dot(v, ray.dir), disc = b*b - dot(v, v) + radius * radius;
    if (disc < 0) return infinity;
    double d = sqrt(disc), t2 = b + d;
    if (t2 < 0) return infinity;
    double t1 = b - d;
    return (t1 > 0 ? t1 : t2);
  }

  Hit intersect(const Hit &hit;, const Ray &ray;) const {
    double lambda = ray_sphere(ray);
    if (lambda >= hit.first) return hit;
    return Hit(lambda, unitise(ray.orig + lambda*ray.dir - center));
  }
};

typedef list<Scene *> Scenes;
struct Group : public Scene {
  Sphere bound;
  Scenes child;

  Group(Sphere b, Scenes c) : bound(b), child(c) {}
  ~Group() {
    for (Scenes::const_iterator it=child.begin(); it!=child.end(); ++it)
      delete *it;
  }

  Hit intersect(const Hit &hit;, const Ray &ray;) const {
    Hit hit2=hit;
    double l = bound.ray_sphere(ray);
    if (l >= hit.first) return hit;
    for (Scenes::const_iterator it=child.begin(); it!=child.end(); ++it)
      hit2 = (*it)->intersect(hit2, ray);
    return hit2;
  }
};

Hit intersect(const Ray &ray;, const Scene &s;)
{ return s.intersect(Hit(infinity, Vec(0, 0, 0)), ray); }

double ray_trace(const Vec &light;, const Ray &ray;, const Scene &s;) {
  Hit hit = intersect(ray, s);
  if (hit.first == infinity) return 0;
  double g = dot(hit.second, light);
  if (g >= 0) return 0.;
  Vec p = ray.orig + hit.first*ray.dir + delta*hit.second;
  return (intersect(Ray(p, -1. * light), s).first < infinity ? 0 : -g);
}

Scene *create(int level, const Vec &c;, double r) {
  Scene *s = new Sphere(c, r);
  if (level == 1) return s;
  Scenes child;
  child.push_back(s);
  double rn = 3*r/sqrt(12.);
  for (int dz=-1; dz<=1; dz+=2)
    for (int dx=-1; dx<=1; dx+=2)
      child.push_back(create(level-1, c + rn*Vec(dx, 1, dz), r/2));
  return new Group(Sphere(c, 3*r), child);
}

int main(int argc, char *argv[]) {
  int level = 6, n = 512, ss = 4;
  if (argc == 2) level = atoi(argv[1]);
  Vec light = unitise(Vec(-1, -3, 2));
  Scene *s(create(level, Vec(0, -1, 0), 1));
  cout << "P5n" << n << " " << n << "n255n";
  for (int y=n-1; y>=0; --y)
    for (int x=0; x<n; ++x) {
      double g=0;
      for (int dx=0; dx<ss; ++dx)
	for (int dy=0; dy<ss; ++dy) {
	  Vec dir(unitise(Vec(x+dx*1./ss-n/2., y+dy*1./ss-n/2., n)));
	  g += ray_trace(light, Ray(Vec(0, 0, -4), dir), *s);
	}
      cout << char(int(.5 + 255. * g / (ss*ss)));
    }
  delete s;
  return 0;
}

|

OCaml (download)

let delta = sqrt epsilon_float

type vec = {x:float; y:float; z:float}
let zero = {x=0.; y=0.; z=0.}
let ( *| ) s r = {x = s *. r.x; y = s *. r.y; z = s *. r.z}
let ( +| ) a b = {x = a.x +. b.x; y = a.y +. b.y; z = a.z +. b.z}
let ( -| ) a b = {x = a.x -. b.x; y = a.y -. b.y; z = a.z -. b.z}
let dot a b = a.x *. b.x +. a.y *. b.y +. a.z *. b.z
let length r = sqrt(dot r r)
let unitise r = 1. /. length r *| r

let ray_sphere orig dir center radius =
  let v = center -| orig in
  let b = dot v dir in
  let d2 = (b *. b -. dot v v +. radius *. radius) in
  if d2 < 0. then infinity else
  let d = sqrt d2 in
  let t1 = b -. d and t2 = b +. d in
  if t2>0. then if t1>0. then t1 else t2 else infinity

let rec intersect orig dir (l, _ as hit) (center, radius, scene) =
  match ray_sphere orig dir center radius, scene with
  | l', _ when l' >= l -> hit
  | l', [] -> l', unitise (orig +| l' *| dir -| center)
  | _, scenes -> intersects orig dir hit scenes
and intersects orig dir hit = function
  | [] -> hit
  | scene::scenes -> intersects orig dir (intersect orig dir hit scene) scenes

let light = unitise {x=1.; y=3.; z= -2.} and ss = 4

let rec ray_trace dir scene =
  let l, n = intersect zero dir (infinity, zero) scene in
  let g = dot n light in
  if g <= 0. then 0. else
    let p = l *| dir +| sqrt epsilon_float *| n in
    if fst (intersect p light (infinity, zero) scene) < infinity then 0. else g

let rec create level c r =
  let obj = c, r, [] in
  if level = 1 then obj else
  let a = 3. *. r /. sqrt 12. in
  let aux x' z' = create (level - 1) (c +| {x=x'; y=a; z=z'}) (0.5 *. r) in
c, 3. *. r, [obj; aux (-.a) (-.a); aux a (-.a); aux (-.a) a; aux a a]

let level, n =
  try int_of_string Sys.argv.(1), int_of_string Sys.argv.(2) with _ -> 9, 512

  let scene = create level {x=0.; y= -1.; z=4.} 1.;;

Printf.printf "P5n%d %dn255n" n n;;
for y = n - 1 downto 0 do
  for x = 0 to n - 1 do
    let g = ref 0. in
    for dx = 0 to ss - 1 do
      for dy = 0 to ss - 1 do
        let aux x d = float x -. float n /. 2. +. float d /. float ss in
        let dir = unitise {x=aux x dx; y=aux y dy; z=float n} in
        g := !g +. ray_trace dir scene
      done;
    done;
    let g = 0.5 +. 255. *. !g /. float (ss*ss) in
    Printf.printf "%c" (char_of_int (int_of_float g))
  done;
done

| |

Piece by piece

A side-by-side comparison of equivalent chunks of code is more instructive.

Header

The C++ program begins with a header:

| ----- | |

#include <vector>
#include <iostream>
#include <limits>
#include <cmath>
using namespace std;

| | |

The OCaml program needs no such header as all of the necessary libraries used are built in to the language in this case.

Numeric Constants

The ray tracer uses numerical values for the square root of the floating point epsilon (the smallest positive float which can be added to one to produce a number not equal to one) and floating point infinity.

In C++, floating point constants are obtained via inclusion of the "limits" header file. In OCaml, the floating point constants are built in.

| ----- | |

numeric_limits<double> dbl;
double delta = sqrt(dbl.epsilon()), infinity = dbl.infinity();

|

let delta = sqrt epsilon_float

| |

3D vectors

The ray tracer makes heavy use of 3D vector arithmetic, so the programs begin with definitions of 3D vectors and associated functions.

The C++ and OCaml programs implement 3D vector as a struct and a record, respectively:

| ----- | |

struct Vec {
  double x, y, z;
  Vec(double x2, double y2, double z2) : x(x2), y(y2), z(z2) {}
};

|

type vec = { x: float; y: float; z: float }

| |

In C++, such structs are most easily instantiated via a constructor. The OCaml language allows records to be instantiated directly and, therefore, does not require constructor definitions to be given.

Vector addition, subtraction, scaling, dot product and unitization are all implemented as simple functions in both C++ and OCaml:

| ----- | |

Vec operator+(const Vec &a;, const Vec &b;)
{ return Vec(a.x+b.x, a.y+b.y, a.z+b.z); }
Vec operator-(const Vec &a;, const Vec &b;)
{ return Vec(a.x-b.x, a.y-b.y, a.z-b.z); }
Vec operator*(double a, const Vec &b;) { return Vec(a*b.x, a*b.y, a*b.z); }
double dot(const Vec &a;, const Vec &b;) { return a.x*b.x + a.y*b.y + a.z*b.z; }
Vec unitise(const Vec &a;) { return (1 / sqrt(dot(a, a))) * a; }

|

let ( *| ) s r = {x = s *. r.x; y = s *. r.y; z = s *. r.z}
let ( +| ) a b = {x = a.x +. b.x; y = a.y +. b.y; z = a.z +. b.z}
let ( -| ) a b = {x = a.x -. b.x; y = a.y -. b.y; z = a.z -. b.z}

let dot a b = a.x *. b.x +. a.y *. b.y +. a.z *. b.z

let unitise r = (1. /. sqrt (dot r r)) *| r

| |

Both implementations use infix operators for vector addition, subtraction and scaling. However, as OCaml disallows overloading (as it conflicts with type inference), the operators are given slightly different names in the OCaml implementation.

We have explicitly asked for pass by reference in the C++ implementation. This is not necessary but performance will be very poor without this optimisation.

Ray tracing primitives

The primitives implemented in this ray tracer are rays, intersections (hits), spheres and groups. Spheres and groups are both types of scene, a group being a bounding sphere and a set of scenes.

We have deliberately chosen a variety of representations for these data. Intersections are represented by pairs, giving the parameter of the intersection and the surface normal at the point of intersection. Rays are represented by named pairs, containing the origin and direction vectors quantifying the ray. Spheres are represented by a floating point radius and center vector. Groups are represented by a bounding sphere and a list of child scenes.

The natural implementations of these in C++ are:

| ----- | | Entity | C++ | OCaml |
| Hit | An STL pair | A tuple |
| Ray | A struct | A record |
| Scene | Abstract base class and derived classes | A variant type |
| Sphere | Derived class | A record |

Let's look at each in turn. Firstly, a hit:

| ----- | |

typedef pair<double, Vec> Hit;

|

| |

In fact, neither implementation needs to explicitly define this type because an STL pair is already defined in the library and uses of a 2-tuple are inferred by OCaml. However, the type of an STL pair is so verbose and common in this program that it is worth factoring it out with a typedef.

Next, the definition of a ray:

| ----- | |

struct Ray {
  Vec orig, dir;
  Ray(const Vec &o;, const Vec &d;) : orig(o), dir(d) {}
};

|

| |

C++ requires the definition of a struct, its members, their types and its trivial constructor. In OCaml, the types are completely inferred and all values may be constructed as literals, so there is no need for any code.

Next, the definition of the scene tree:

| ----- | |

struct Scene {
  virtual ~Scene() {};
  ...
};

struct Sphere : public Scene {
  Vec center;
  double radius;
  Sphere(Vec c, double r) : center(c), radius(r) {}
  ~Sphere() {}
  ...
};

typedef list<Scene *> Scenes;
struct Group : public Scene {
  Sphere bound;
  Scenes child;
  Group(Sphere b, Scenes c) : bound(b), child(c) {}
  ~Group() {
    for (Scenes::const_iterator it=child.begin();
         it!=child.end(); ++it)
      delete *it;
  }
  ...
};

|

| |

Amazingly, there is no easy way to define a type in C++ which can be "one of these or one of these", e.g. a sphere or a group in this case. The old C equivalent is a tagged union, but these are rare in modern C++ programs. Instead, an inheritance hierarchy is built, with derived classes representing the different possibilities. This is a common (ab)use of object oriented programming.

Also, the scene type in OCaml simply refers to itself. This is not possible in C++ and, instead, the programmer must use the convoluted approach of handling pointers to instances of the abstract base class. The objects pointed to must therefore be explicitly allocated and deallocated, which is most easily done using destructors.

Again, OCaml's type inference obviates the need to write any code.

Primitive ray tracing functions

The ray tracer is based around two simple functions.

  • ray_sphere
    

Computes the distance along the ray to its intersection with a sphere, returning infinity if there is no intersection.

  • intersect
    

Finds the first intersection of the ray with the scene.

The implementations of the "ray_sphere" function are very similar:

| ----- | |

struct Sphere : public Scene {
  ...
  double ray_sphere(const Ray &ray;) const {
    Vec v = center - ray.orig;
    double b = dot(v, ray.dir), disc = b*b - dot(v, v) + radius * radius;
    if (disc < 0) return infinity;
    double d = sqrt(disc), t2 = b + d;
    if (t2 < 0) return infinity;
    double t1 = b - d;
    return (t1 > 0 ? t1 : t2);
  }
};

|

let ray_sphere orig dir center radius =
  let v = center -| orig in
  let b = dot v dir in
  let d2 = (b *. b -. dot v v +. radius *. radius) in
  if d2 < 0. then infinity else
  let d = sqrt d2 in
  let t1 = b -. d and t2 = b +. d in
  if t2>0. then if t1>0. then t1 else t2 else infinity

| |

There are only two differences. Firstly, the C++ implementation can return from any point in the function using the "return" keyword whereas the OCaml function must wrap the rest of the function in the "else" portion of an "if" expression. Secondly, the "ray_sphere" function is nested inside the "Scene" class in the C++ implementation whereas this function appears in the top level of the OCaml implementation.

The "intersect" function is more interesting:

| ----- | |

struct Scene {
  ...
  virtual Hit intersect(const Hit &, const Ray &) const = 0;
};

struct Sphere : public Scene {
  ...
  Hit intersect(const Hit &hit;, const Ray &ray;) const {
    double lambda = ray_sphere(ray);
    if (lambda >= hit.first) return hit;
    return Hit(lambda, unitise(ray.orig + lambda*ray.dir - center));
  }
};

struct Group : public Scene {
  ...
  Hit intersect(const Hit &hit;, const Ray &ray;) const {
    Hit hit2=hit;
    double l = bound.ray_sphere(ray);
    if (l >= hit.first) return hit;
    for (Scenes::const_iterator it=child.begin(); it!=child.end(); ++it)
      hit2 = (*it)->intersect(hit2, ray);
    return hit2;
  }
};

Hit intersect(const Ray &ray;, const Scene &s;)
{ return s.intersect(Hit(infinity, Vec(0, 0, 0)), ray); }

|

let rec intersect orig dir (l, _ as hit) (center, radius, scene) =
  match ray_sphere orig dir center radius, scene with
  | l', _ when l' >= l -> hit
  | l', [] -> l', unitise (orig +| l' *| dir -| center)
  | _, scenes -> intersects orig dir hit scenes
and intersects orig dir hit = function
  | [] -> hit

  | scene::scenes -> intersects orig dir (intersect orig dir hit scene) scenes

| |

In the C++, the function to accumulate a hit over the scene is a member of the "Scene" class and its implementation is split between the derived classes. The latter function is provided separately. In the OCaml, this is a single function.

These two different approaches to the distribution of codehave different advantages. The C++ approach groups code by the contructor they are related to. The OCaml approach groups code by the functions they are related to. Essentially, the C++ approach requires only localised changes to the program when the inheritance hierarchy is extended (by adding another derived class) whereas the OCaml approach requires only localised changes to the program when new functions are added.

There are several other interesting aspects to this function:

  • The OCaml implementation is purely functional
  • The C++ implementation requires the declaration of an explicit type for the "intersect" member function whereas OCaml infers the type.
  • The OCaml implementation uses pattern matching to dissect the scene tree whereas the C++ uses virtual table dispatch.
  • OCaml's higher-order function "List.fold_left" factors out the STL-iterator loop in C++.

Ray tracing

The core function of the ray tracer is, unsurprisingly, called "ray_trace". This function traces a ray from the camera into the scene, returning zero (black) if the ray does not intersect anything or the intersection point is on the dark side of the object or a second ray from the intersection point to the light intersects another object (which is casting a shadow on this intersection point). Otherwise, a greyscale value given by a simple diffuse lighting expression is returned.

The implementations of the "ray_trace" function are very similar:

| ----- | |

double ray_trace(const Vec &light;, const Ray &ray;, const Scene &s;) {
  Hit hit = intersect(ray, s);
  if (hit.first == infinity) return 0;
  double g = dot(hit.second, light);
  if (g >= 0) return 0.;
  Vec p = ray.orig + hit.first*ray.dir + delta*hit.second;
  return (intersect(Ray(p, -1. * light), s).first < infinity ? 0 : -g);
}

|

let rec ray_trace dir scene =
  let l, n = intersect zero dir (infinity, zero) scene in
  let g = dot n light in
  if g <= 0. then 0. else
    let p = l *| dir +| sqrt epsilon_float *| n in
    if fst (intersect p light (infinity, zero) scene) < infinity then 0. else g

| |

The first element (the parametric coordinate "lambda") of the pair "hit" is extracted using the "first" member function in the C++ and the "fst" function in OCaml.

Creating the Scene

The scene is represented by a tree data structure, leaf nodes of which are spheres and non-leaf nodes are groups of further nodes. Due to the recursive nature of this data structure, the scene is most easiy built recursively.

Both programs use a "create" function to build the scene. This function accepts as arguments the number of levels remaining and the radius and coordinates of the current sphere:

| ----- | |

Scene *create(int level, const Vec &c;, double r) {
  Scene *s = new Sphere(c, r);
  if (level == 1) return s;
  Scenes child;
  child.push_back(s);
  double rn = 3*r/sqrt(12.);
  for (int dz=-1; dz<=1; dz+=2)
    for (int dx=-1; dx<=1; dx+=2)
      child.push_back(create(level-1, c + rn*Vec(dx, 1, dz), r/2));
  return new Group(Sphere(c, 3*r), child);
}

|

let rec create level c r =
  let obj = c, r, [] in
  if level = 1 then obj else
    let a = 3. *. r /. sqrt 12. in
    let aux x' z' = create (level - 1) (c +| {x=x'; y=a; z=z'}) (0.5 *. r) in
    c, 3. *. r, [obj; aux (-.a) (-.a); aux a (-.a); aux (-.a) a; aux a a]

| |

There are also some interesting differences between these implementations:

  • The OCaml implementation is purely functional.
  • In C++, Sphere and Group objects must be allocated using the "new" keyword.
  • Native support for linked lists (including literals) in OCaml results in shorter code.

Main program

The main part of the program is responsible for extracting the optional command line argument (the number of levels of spheres), creating the scene and, tracing rays for each pixel in the output image and writing the results to stdout as a PGM file.

The C++ and OCaml implementations of the main part of the program are similar in size:

| ----- | |

int main(int argc, char *argv[]) {
  int level = 6, n = 512, ss = 4;
  if (argc == 2) level = atoi(argv[1]);
  Vec light = unitise(Vec(-1, -3, 2));
  Scene *s(create(level, Vec(0, -1, 0), 1));
  cout << "P5n" << n << " " << n << "n255n";
  for (int y=n-1; y>=0; --y)
    for (int x=0; x<n; ++x) {
      double g=0;
      for (int dx=0; dx<ss; ++dx)
	for (int dy=0; dy<ss; ++dy) {
	  Vec dir(unitise(Vec(x+dx*1./ss-n/2., y+dy*1./ss-n/2., n)));
	  g += ray_trace(light, Ray(Vec(0, 0, -4), dir), *s);
	}
      cout << char(int(.5 + 255. * g / (ss*ss)));
    }
  delete s;
  return 0;
}

|

Printf.printf "P5n%d %dn255n" n n;;
for y = n - 1 downto 0 do
  for x = 0 to n - 1 do
    let g = ref 0. in
    for dx = 0 to ss - 1 do
      for dy = 0 to ss - 1 do
        let aux x d = float x -. float n /. 2. +. float d /. float ss in
        let dir = unitise {x=aux x dx; y=aux y dy; z=float n} in
        g := !g +. ray_trace dir scene
      done;
    done;
    let g = 0.5 +. 255. *. !g /. float (ss*ss) in
    Printf.printf "%c" (char_of_int (int_of_float g))
  done;
done

| |

Again, there are some interesting differences between the two implementations:

  • The scene must be explicitly deallocated in C++ by deleting it, recursively invoking the destructors.
  • For the first time, the OCaml is not purely functional as it uses a reference.
  • The C++ uses some nifty (dangerous) tricks to implicitly cast numbers from integers to floating point values whereas the OCaml must explicitly perform such conversions using the built-in "float", "int_of_float" and "char_of_int" functions.
  • Conversion is factored out into a nested auxiliary function "aux" in the OCaml.

Performance

Ideally, the compiler will do all of the performance optimisation so that the programmer doesn't have to. Consequently, it is interesting to measure the performance of these simple ray tracer implementations in order to see how well the compilers optimise the programs.

We have measured performance on 32- and 64-bit architectures.

32-bit

On a 2.2GHz Athlon 64 running Debian Linux in a 32-bit chroot, the two programs can be compiled with (g++ 4.1.3 and ocamlopt 3.10.0):

g++ -O3 -ffast-math -funroll-all-loops -march=athlon ray.cpp -o ray
ocamlopt -rectypes -inline 1000 -ffast-math ray.ml -o ray

The following graph illustrates the time taken t to ray trace scenes of different complexities, where n is the number of levels of spheres, with the C++ program (red) and OCaml program (blue):

With two cross-overs, there is little difference between the performance of the OCaml and C++ implementations. Note that the C++ was compiled with CPU-specific optimizations whereas the OCaml binary will run on any 486 compatible.

64-bit

On a 2.2GHz Athlon 64 running Debian Linux, the two programs can be compiled with (g++ 4.1.4 and ocamlopt 3.10.0):

g++ -O3 -ffast-math -funroll-all-loops ray.cpp -o ray
ocamlopt -rectypes -inline 1000 ray.ml -o ray

The following graph illustrates the time taken t to ray trace scenes of different complexities, where n is the number of levels of spheres, with the C++ program (red) and OCaml program (blue):

Again, the performance curves cross over twice and there is little different between the speeds of the OCaml and C++ implementations.

Conclusions

The OCaml implementation of the ray tracer is 47% shorter than the C++ implementation. The brevity of the OCaml implementation can be attributed to several different factors:

  • Type inference removes the need to specify types explicitly over and over.
  • The object-oriented representation of a trivial sum type as an inheritance hierarchy of classes is needlessly inefficient.
  • No need to write constructors for tuples, records and variant types in OCaml.
  • Native lists in OCaml.
  • Higher-order functions instead of loops in OCaml.

The C++ and OCaml implementations of this ray tracer perform almost identically. However, note that the C++ implementation has been optimised by using pass by reference for structs. In contrast, the OCaml implementation is unoptimised.

This program has demonstrated some of the ways that OCaml improves upon C++.

The following webpages are derived from this work:

| ----- | | |

Read OCaml for Scientists now!

|

|

| ----- | | Flying Frog Consultancy Ltd., 2007 | Contact the webmaster |