package sim.app.heatbugs; import sim.engine.*; import sim.field.grid.*; /** This agent decreases evaporates and diffuses all the heat at each time step. */ public /*strictfp*/ class Diffuser implements Steppable { public void step(SimState state) { HeatBugs heatbugs = (HeatBugs)state; // Donald Knuth said that premature optimization is the root of all evil. // Well, not so in HeatBugs, where a huge proportion of time is spent just // in one single method. // // This method is where HeatBugs spends 90% of its time, so it's worthwhile // optimizing the daylights out of it. // Let's go through some variations of the diffusion portion (dumping the // evaportated, diffused stuff into heatbugs.valgrid2) in increasing speed. /* double average; for(int x=0;x< heatbugs.valgrid.getWidth();x++) for(int y=0;y< heatbugs.valgrid.getHeight();y++) { average = 0.0; // for each neighbor of that position for(int dx=-1; dx< 2; dx++) for(int dy=-1; dy<2; dy++) { // compute the toroidal position of the neighbor int xx = heatbugs.valgrid.tx(x+dx); int yy = heatbugs.valgrid.ty(y+dy); // compute average average += heatbugs.valgrid.field[xx][yy]; } average /= 9.0; // load the new value into HeatBugs.this.valgrid2 heatbugs.valgrid2.field[x][y] = heatbugs.evaporationRate * (heatbugs.valgrid.field[x][y] + heatbugs.diffusionRate * (average - heatbugs.valgrid.field[x][y])); } // ---------------------------------------------------------------------- // The first thing to note is that tx and ty are slower than stx and sty. // You use tx and ty when you expect to have to wrap toroidal values that are // way out there. stx and sty are fine if your toroidal values are never off // the board more than one width's worth in the x direction or one height's worth // in the y direction. // // Also, you don't want to compute getWidth() every loop, and CERTAINLY don't want // to compute getHeight() width times every loop! So now we can write: double average; final int _gridWidth = heatbugs.valgrid.getWidth(); final int _gridHeight = heatbugs.valgrid.getHeight(); for(int x=0;x< _gridWidth;x++) for(int y=0;y< _gridHeight;y++) { average = 0.0; // for each neighbor of that position for(int dx=-1; dx< 2; dx++) for(int dy=-1; dy<2; dy++) { // compute the toroidal position of the neighbor int xx = heatbugs.valgrid.stx(x+dx); int yy = heatbugs.valgrid.sty(y+dy); // compute average average += heatbugs.valgrid.field[xx][yy]; } average /= 9.0; // load the new value into HeatBugs.this.valgrid2 heatbugs.valgrid2.field[x][y] = heatbugs.evaporationRate * (heatbugs.valgrid.field[x][y] + heatbugs.diffusionRate * (average - heatbugs.valgrid.field[x][y])); } // ---------------------------------------------------------------------- // We set _gridWidth and _gridHeight to be final mostly to remind us that // they don't change. Although Java COULD take advantage of // them being final to improve optimization, right now it doesn't. The // final declaration is stripped out when compiling to bytecode. Oh well! // // Okay, so what's wrong with the new incarnation? Well, lots of variables // are being accessed via instances (like heatbugs.evaporationRate and // heatbugs.valgrid.field). Instance data lookups, even for data in YOUR // instance, is always much slower than locals. Let's make some locals. // locals are faster than instance variables final DoubleGrid2D _valgrid = heatbugs.valgrid; final double[][] _valgrid_field = heatbugs.valgrid.field; final double[][] _valgrid2_field = heatbugs.valgrid2.field; final int _gridWidth = heatbugs.valgrid.getWidth(); final int _gridHeight = heatbugs.valgrid.getHeight(); final double _evaporationRate = heatbugs.evaporationRate; final double _diffusionRate = heatbugs.diffusionRate; double average; for(int x=0;x< _gridWidth;x++) for(int y=0;y< _gridHeight;y++) { average = 0.0; // for each neighbor of that position for(int dx=-1; dx< 2; dx++) for(int dy=-1; dy<2; dy++) { // compute the toroidal position of the neighbor int xx = _valgrid.stx(x+dx); int yy = _valgrid.sty(y+dy); // compute average average += _valgrid_field[xx][yy]; } average /= 9.0; // load the new value into HeatBugs.this.valgrid2 _valgrid2_field[x][y] = _evaporationRate * (_valgrid_field[x][y] + _diffusionRate * (average - _valgrid_field[x][y])); } // ---------------------------------------------------------------------- // That was a BIG jump in speed! Now we're getting somewhere! // Next, Java's array lookups work like this. If you say foo[x][y], it // first looks up the array value foo[x] (which is a one-dimensional array // in of itself). Then it looks up that array[y]. This is TWO array bounds // checks and you have to load the arrays into cache. There's a significant // hit here. We can get an almost 2x speedup if we keep the arrays around // that we know we need. // // We'll do it as follows. We keep around _valgrid[x] and _valgrid2[x] and // just look up the [y] values in them when we need to. Also since we're // diffusing, we need the _valgrid[x-1] and _valgrid[x+1] rows also. We'll // call these: _valgrid[x] -> _current // _valgrid[x-1] -> _past // _valgrid[x+1] -> _next // _valgrid2[x] -> _put // // Note that next iteration around, _current becomes _past and _next becomes // _current. So we only have to look up the new _next and the new _put. // We'll take advantage of that too. // Last, we'll get rid of the inner for-loop and just hand-code the nine // lookups for the diffuser. */ // locals are faster than instance variables final DoubleGrid2D _valgrid = heatbugs.valgrid; final double[][] _valgrid_field = heatbugs.valgrid.field; final double[][] _valgrid2_field = heatbugs.valgrid2.field; final int _gridWidth = _valgrid.getWidth(); final int _gridHeight = _valgrid.getHeight(); final double _evaporationRate = heatbugs.evaporationRate; final double _diffusionRate = heatbugs.diffusionRate; double average; double[] _past = _valgrid_field[_valgrid.stx(-1)]; double[] _current = _valgrid_field[0]; double[] _next; double[] _put; // for each x and y position for(int x=0;x< _gridWidth;x++) { _next = _valgrid_field[_valgrid.stx(x+1)]; _put = _valgrid2_field[_valgrid.stx(x)]; for(int y=0;y< _gridHeight;y++) { // for each neighbor of that position // go across top average = (_past[_valgrid.sty(y-1)] + _past[_valgrid.sty(y)] + _past[_valgrid.sty(y+1)] + _current[_valgrid.sty(y-1)] + _current[_valgrid.sty(y)] + _current[_valgrid.sty(y+1)] + _next[_valgrid.sty(y-1)] + _next[_valgrid.sty(y)] + _next[_valgrid.sty(y+1)]) / 9.0; // load the new value into HeatBugs.this.valgrid2 _put[y] = _evaporationRate * (_current[y] + _diffusionRate * (average - _current[y])); } // swap elements _past = _current; _current = _next; } // ---------------------------------------------------------------------- // If you have a multiprocessor machine, you can speed this up further by // dividing the work among two processors. We do that over in ThreadedDiffuser.java // // You can also avoid some of the array bounds checks by using linearized // double arrays -- that is, using a single array but computing the double // array location yourself. That way you only have one bounds check instead // of two. This is how, for example, Repast does it. This is certainly a // little faster than two checks. We use a two-dimensional array because a // linearized array class is just too cumbersome to use in Java right now, // what with all the get(x,y) and set(x,y,v) instead of just saying foo[x][y]. // Plus it turns out that for SMALL (say 100x100) arrays, the double array is // actually *faster* because of cache advantages. // // At some point in the future Java's going to have to fix the lack of true // multidimensional arrays. It's a significant speed loss. IBM has some proposals // in the works but it's taking time. However their proposals are for array classes. // So allow me to suggest how we can do a little syntactic sugar to make that prettier. // The array syntax for multidimensional arrays should be foo[x,y,z] and for // standard Java arrays it should be foo[x][y][z]. This allows us to mix the two: // a multidimensional array of Java arrays for example: foo[x,y][z]. Further we // should be allowed to linearize a multidimensional array, accessing all the elements // in row-major order. The syntax for a linearized array simply has empty commas: // foo[x,,] // oh yeah, we have one last step. // now finally copy HeatBugs.this.valgrid2 to HeatBugs.this.valgrid, and we're done _valgrid.setTo(heatbugs.valgrid2); } }