From 88f1aa7075148316029362517d593abc53452823 Mon Sep 17 00:00:00 2001 From: runge Date: Sun, 27 Jun 2004 02:54:50 +0000 Subject: x11vnc: speed up scaling a bit, add no blending option to -scale --- x11vnc/ChangeLog | 4 + x11vnc/x11vnc.c | 289 +++++++++++++++++++++++++++++++++++++------------------ 2 files changed, 199 insertions(+), 94 deletions(-) diff --git a/x11vnc/ChangeLog b/x11vnc/ChangeLog index 7cc81ce..95ff7e9 100644 --- a/x11vnc/ChangeLog +++ b/x11vnc/ChangeLog @@ -1,3 +1,7 @@ +2004-06-27 Karl Runge + * speed up scaling a bit for slow machines (still all floating point) + * add no blending option (-scale fraction:nb) + 2004-06-26 Karl Runge * add -scale fract for global (not per-client) server-side scaling working more or less OK, needs to be optimized at some point. diff --git a/x11vnc/x11vnc.c b/x11vnc/x11vnc.c index 6c27e88..131dd29 100644 --- a/x11vnc/x11vnc.c +++ b/x11vnc/x11vnc.c @@ -193,6 +193,7 @@ int rfb_bytes_per_line; /* scaling info */ int scaling = 0; +int scaling_noblend = 0; double scale_fac = 1.0; int scaled_x = 0, scaled_y = 0; @@ -3736,11 +3737,14 @@ void initialize_screen(int *argc, char **argv, XImage *fb) { main_blue_mask = fb->blue_mask; if (scaling) { - width = (int) (width * scale_fac); - height = (int) (height * scale_fac); + double eps = 0.000001; + width = (int) (width * scale_fac + eps); + height = (int) (height * scale_fac + eps); scaled_x = width; scaled_y = height; rfb_bytes_per_line = (main_bytes_per_line / fb->width) * width; + rfbLog("scaling screen: %dx%d -> %dx%d scale_fac=%.5f\n", + fb->width, fb->height, scaled_x, scaled_y, scale_fac); } else { rfb_bytes_per_line = main_bytes_per_line; } @@ -4643,18 +4647,17 @@ static void hint_updates(void) { #define FLOOR(x) ( (double) ((int) (x)) ) /* - * Scaling. + * Scaling: * * For shrinking, a destination (scaled) pixel will correspond to more - * than one source (i.e. main fb) pixel. Think of an x-y plane made - * with graph paper. Each square in the graph paper (i.e. collection - * of points (x,y) such that N < x < N+1 and M < y < M+1, N and M - * integers) corresponds to one pixel in the unscaled fb. There is a - * solid color filling the inside such a square. A scaled pixel has - * width 1/scale_fac, e.g. for "-scale 3/4" the width of the scaled - * pixel is 1.333. The area of this scaled pixel is 1.333 * 1.333 - * (so it obviously overlaps more than one source pixel, each which - * have area 1). + * than one source (i.e. main fb) pixel. Think of an x-y plane made with + * graph paper. Each unit square in the graph paper (i.e. collection of + * points (x,y) such that N < x < N+1 and M < y < M+1, N and M integers) + * corresponds to one pixel in the unscaled fb. There is a solid + * color filling the inside of such a square. A scaled pixel has width + * 1/scale_fac, e.g. for "-scale 3/4" the width of the scaled pixel + * is 1.333. The area of this scaled pixel is 1.333 * 1.333 (so it + * obviously overlaps more than one source pixel, each which have area 1). * * We take the weight an unscaled pixel (source) contributes to a * scaled pixel (destination) as simply proportional to the overlap area @@ -4662,65 +4665,103 @@ static void hint_updates(void) { * pixel as an integral over the portion of the graph paper it covers. * The thing being integrated is the color value of the unscaled source. * That color value is constant over a graph paper square (source pixel), - * and changes discontinuously from one square to the next. + * and changes discontinuously from one unit square to the next. + * + +Here is an example for -scale 3/4, the solid lines are the source pixels +(graph paper unit squares), while the dotted lines denote the scaled +pixels (destination pixels): + + 0 1 4/3 2 8/3 3 4=12/3 + |---------|--.------|------.--|---------|. + | | . | . | |. + | A | . B | . | |. + | | . | . | |. + | | . | . | |. + 1 |---------|--.------|------.--|---------|. + 4/3|.........|.........|.........|.........|. + | | . | . | |. + | C | . D | . | |. + | | . | . | |. + 2 |---------|--.------|------.--|---------|. + | | . | . | |. + | | . | . | |. + 8/3|.........|.........|.........|.........|. + | | . | . | |. + 3 |---------|--.------|------.--|---------|. + +So we see the first scaled pixel (0 < x < 4/3 and 0 < y < 4/3) mostly +overlaps with unscaled source pixel "A". The integration (averaging) +weights for this scaled pixel are: + + A 1 + B 1/3 + C 1/3 + D 1/9 + * * The Red, Green, and Blue color values must be averaged over separately - * otherwise you can get a complete mess (except in solid regions). + * otherwise you can get a complete mess (except in solid regions), + * because high order bits are averaged differently from the low order bits. * * So the algorithm is roughly: * * - Given as input a rectangle in the unscaled source fb with changes, - * find the rectangle of pixels this affects in the scaled destination - * fb. + * find the rectangle of pixels this affects in the scaled destination fb. * - * - For each of the affected scaled pixels, determine all of the - * unscaled pixels it overlaps with. + * - For each of the affected scaled (dest) pixels, determine all of the + * unscaled (source) pixels it overlaps with. * - * - Average those unscaled values together, weighted by the area + * - Average those unscaled source values together, weighted by the area * overlap with the destination pixel. Average R, G, B separately. * * - Take this average value and convert to a valid pixel value if * necessary (e.g. rounding, shifting), and then insert it into the * destination framebuffer as the pixel value. * + * - On to the next destination pixel... + * * ======================================================================== * - * For expanding (which we don't think people will do very often... or - * at least so we hope, the framebuffer can become huge) the situation - * is reversed and the destination pixel is smaller than a "graph paper" - * square (source pixel). Some destination pixels will be completely - * within a single unscaled source pixel. + * For expanding, e.g. -scale 1.1 (which we don't think people will do + * very often... or at least so we hope, the framebuffer can become huge) + * the situation is reversed and the destination pixel is smaller than a + * "graph paper" unit square (source pixel). Some destination pixels + * will be completely within a single unscaled source pixel. * * What we do here is a simple 4 point interpolation scheme: * * Let P00 be the source pixel closest to the destination pixel but with * x and y values less than or equal to those of the destination pixel. - * It is the source pixel immediately to the upper left of the destination + * (for simplicity, think of the upper left corner of a pixel defining the + * x,y location of the pixel, the center would work just as well). So it + * is the source pixel immediately to the upper left of the destination * pixel. Let P10 be the source pixel one to the right of P00. Let P01 * be one down from P00. And let P11 be one down and one to the right * of P00. They form a 2x2 square we will interpolate inside of. * * Let V00, V10, V01, and V11 be the color values of those 4 source - * pixels. Let dx be the distance along x the destination pixel is from - * P00. Note: 0 <= dx < 1. Similarly let dy be the distance along y. - * The weighted average is: + * pixels. Let dx be the displacement along x the destination pixel is + * from P00. Note: 0 <= dx < 1 by definition of P00. Similarly let + * dy be the displacement along y. The weighted average for the + * interpolation is: * - * Vave = V00 * (1 - dx) * (1 - dy) - * + V10 * dx * (1 - dy) - * + V01 * (1 - dx) * dy - * + V11 * dx * dy + * V_ave = V00 * (1 - dx) * (1 - dy) + * + V10 * dx * (1 - dy) + * + V01 * (1 - dx) * dy + * + V11 * dx * dy * - * Note that the weights (1-dx)*(1-dy) + dx(1-dy) + (1-dx)*dy + dx*dy - * automatically add up to 1. It is also nice that all the weights - * are positive. The above formula can be motivated by doing two 1D - * interpolations along x: + * Note that the weights (1-dx)*(1-dy) + dx*(1-dy) + (1-dx)*dy + dx*dy + * automatically add up to 1. It is also nice that all the weights are + * positive (unsigned char stays unsigned char). The above formula can + * be motivated by doing two 1D interpolations along x: * * VA = V00 * (1 - dx) + V10 * dx * VB = V01 * (1 - dx) + V11 * dx * * and then interpolating VA and VB along y: * - * Vave = VA * (1 - dy) + VB * dy + * V_ave = VA * (1 - dy) + VB * dy * * VA * v |<-dx->| @@ -4729,15 +4770,20 @@ static void hint_updates(void) { * -- | o...|... "o" denotes the position of the desired * ^ | . | . destination pixel relative to the P00 * | . | . source pixel. - * V10 ------ V11 . + * V10 ----.- V11 . * ........ + * | * VB * * - * Of course R, G, B averages are done separately. This gives reasonable - * results. I believe this is called bilinear scaling. + * Of course R, G, B averages are done separately as in the shrinking + * case. This gives reasonable results, and the implementation for + * shrinking can simply be used with different choices for weights for + * the loop over the 4 pixels. */ +#define FPTYPE double + static void scale_and_mark_rect(int X1, int Y1, int X2, int Y2) { /* * Notation: @@ -4753,23 +4799,22 @@ static void scale_and_mark_rect(int X1, int Y1, int X2, int Y2) { int i, j, i1, i2, j1, j2; /* indices for scaled fb (dest) */ int I, J, I1, I2, J1, J2; /* indices for main fb (source) */ - double w, wx, wy, wtot; /* pixel weights */ + FPTYPE w, wx, wy, wtot; /* pixel weights */ - double x1, y1, x2, y2; /* x-y coords for destination pixels edges */ - double dx, dy; /* size of destination pixel */ + FPTYPE x1, y1, x2, y2; /* x-y coords for destination pixels edges */ + FPTYPE dx, dy; /* size of destination pixel */ - double ddx, ddy; /* for interpolation expansion */ + FPTYPE ddx, ddy; /* for interpolation expansion */ char *src, *dest; /* pointers to the two framebuffers */ - double pixave[4]; /* for averaging pixel values */ + FPTYPE pixave[4]; /* for averaging pixel values */ - unsigned char uc; /* tmp pixel data holders */ unsigned short us; int shrink; /* whether shrinking or expanding */ + static int constant_weights = -1; - int pseudocolor = 0; /* true if PseudoColor... */ if (scale_fac <= 1.0) { shrink = 1; } else { @@ -4777,7 +4822,10 @@ static void scale_and_mark_rect(int X1, int Y1, int X2, int Y2) { } if (! screen->rfbServerFormat.trueColour) { - pseudocolor = 1; + /* + * PseudoColor colormap... blending leads to random colors. + */ + scaling_noblend = 1; } Bpp = bpp/8; /* Bytes per pixel */ @@ -4793,8 +4841,8 @@ static void scale_and_mark_rect(int X1, int Y1, int X2, int Y2) { * both are > 1 (e.g. 1.333 for -scale 3/4) * they should also be equal but we don't assume it. */ - dx = (double) Nx / nx; - dy = (double) Ny / ny; + dx = (FPTYPE) Nx / nx; + dy = (FPTYPE) Ny / ny; /* * find the extent of the change the input rectangle induces in @@ -4807,11 +4855,11 @@ static void scale_and_mark_rect(int X1, int Y1, int X2, int Y2) { /* Right edges: find smallest i such that (i+1) * dx >= X2+1 */ i2 = CEIL( (X2+1)/dx ) - 1; - /* to be safe, correct any overflows: */ + /* To be safe, correct any overflows: */ i1 = nfix(i1, nx); i2 = nfix(i2, nx) + 1; /* add 1 to make a rectangle upper boundary */ - /* repeat above for y direction: */ + /* Repeat above for y direction: */ j1 = FLOOR(Y1/dy); j2 = CEIL( (Y2+1)/dy ) - 1; @@ -4819,21 +4867,55 @@ static void scale_and_mark_rect(int X1, int Y1, int X2, int Y2) { j2 = nfix(j2, ny) + 1; /* - * loop over destination pixels in scaled fb: + * There is some speedup if the pixel weights are constant, so + * let's special case these. + * + * If scale = 1/n and n divides Nx and Ny, the pixel weights + * are constant. + */ + if (constant_weights < 0) { + int n = 0; + constant_weights = 0; + + for (i = 2; i<=128; i++) { + double test = ((double) 1)/ i; + double diff, eps = 1.0e-9; + diff = scale_fac - test; + if (-eps < diff && diff < eps) { + n = i; + break; + } + } + if (n != 0) { + if (! scaling_noblend && Nx % n == 0 && Ny % n == 0) { + rfbLog("scale_and_mark_rect: using constant " + "pixel weight speedup for 1/%d\n", n); + constant_weights = 1; + } + } + } + /* set these all to 1.0 to begin with */ + wx = 1.0; + wy = 1.0; + w = 1.0; + + /* + * Loop over destination pixels in scaled fb: */ for (j=j1; j= Nx) I1 = Nx - 1; - if (!shrink) { + if (shrink) { + I2 = (int) CEIL(x2) - 1; + if (I2 >= Nx) I2 = Nx - 1; + } else { I2 = I1 + 1; /* simple interpolation */ + ddx = x1 - I1; } - /* zero out accumulators for next pixel average: */ + /* Zero out accumulators for next pixel average: */ for (b=0; b<4; b++) { pixave[b] = 0.0; /* for RGB weighted sums */ } /* * wtot is for accumulating the total weight. - * It should always be 1/(scale_fac * scale_fac), - * but we don't assume that. + * It should always sum to 1/(scale_fac * scale_fac). */ wtot = 0.0; - if (!shrink) { - /* interpolation distances, see diagram above */ - ddx = x1 - I1; - ddy = y1 - J1; - } - /* - * loop over source pixels covered by this dest pixel: + * Loop over source pixels covered by this dest pixel. + * + * These "extra" loops over "J" and "I" make + * the cache/cacheline performance unclear. + * For example, will the data brought in from + * src for j, i, and J=0 still be in the cache + * after the J > 0 data have been accessed and + * we are at j, i+1, J=0? The stride in J is + * main_bytes_per_line, and so ~4 KB. */ for (J=J1; J<=J2; J++) { /* see comments for I, x1, x2, etc. below */ - if (pseudocolor) { + if (constant_weights) { + ; + } else if (scaling_noblend) { if (J != J1) { continue; } @@ -4886,7 +4972,7 @@ static void scale_and_mark_rect(int X1, int Y1, int X2, int Y2) { /* interpolation scheme: */ } else if (!shrink) { if (J >= Ny) { - continue; /* off edge */ + continue; } else if (J == J1) { wy = 1.0 - ddy; } else if (J != J1) { @@ -4908,11 +4994,15 @@ static void scale_and_mark_rect(int X1, int Y1, int X2, int Y2) { /* Work out the weight: */ - if (pseudocolor) { + if (constant_weights) { + ; + } else if (scaling_noblend) { /* - * ugh, colormap is bad news, to - * avoid random colors just take - * the first pixel. + * Ugh, PseudoColor colormap is + * bad news, to avoid random + * colors just take the first + * pixel. Or user may have + * specified :nb to fraction. */ if (I != I1) { continue; @@ -4958,22 +5048,22 @@ static void scale_and_mark_rect(int X1, int Y1, int X2, int Y2) { /* - * we average the unsigned char value + * We average the unsigned char value * instead of char value: otherwise * the minimum (char 0) is right next * to the maximum (char -1)! This way * they are spread between 0 and 255. */ - if (Bpp == 4 || Bpp == 1) { - + if (Bpp != 2) { for (b=0; b