This is an automated email from the git hooks/post-receive script. It was generated because a ref change was pushed to the repository containing the project gfxprim.git.
The branch, master has been updated via 8dafb4ba1c30fc36d48f86e166212af3b7cf329c (commit) via 763bca3b9ac509c837ca940dcede58e58b86d21b (commit) from 02901b7585b1265cedf68d019e7eb8ec5263c5a7 (commit)
Those revisions listed above that are new to this repository have not appeared on any other notification email; so we list those revisions in full, below.
- Log ----------------------------------------------------------------- http://repo.or.cz/w/gfxprim.git/commit/8dafb4ba1c30fc36d48f86e166212af3b7cf3...
commit 8dafb4ba1c30fc36d48f86e166212af3b7cf329c Author: Cyril Hrubis metan@ucw.cz Date: Sat Feb 11 21:39:59 2012 +0100
core: Much faster blit for most of the cases.
When src and dst have same pixel type and orientation it's simple and fast memcpy in most of the cases.
When src and dst have same orientation it's optimized conversion loop more than 50% faster than unspecialized even on small bitmaps and about 5 times faster on fulscreen image blit.
Ultimate todo faster rotated blits.
diff --git a/include/core/GP_Blit.gen.h.t b/include/core/GP_Blit.gen.h.t deleted file mode 100644 index cc1cc6e..0000000 --- a/include/core/GP_Blit.gen.h.t +++ /dev/null @@ -1,16 +0,0 @@ -%% extends "base.h.t" - -{% block descr %}Specialized blit functions and macros.{% endblock %} - -%% block body - -%% for ps in pixelsizes -/*** Blit preserving type for {{ ps.suffix }} *** - * Assumes the contexts to be of the right types and sizes - * Ignores transformations and clipping */ -void GP_Blit_{{ ps.suffix }}(const GP_Context *c1, GP_Coord x1, GP_Coord y1, GP_Size w, GP_Size h, - GP_Context *c2, GP_Coord x2, GP_Coord y2); - -%% endfor - -%% endblock body diff --git a/include/core/GP_Blit.h b/include/core/GP_Blit.h index 152df72..6c8ad84 100644 --- a/include/core/GP_Blit.h +++ b/include/core/GP_Blit.h @@ -39,9 +39,6 @@ #ifndef CORE_GP_BLIT_H #define CORE_GP_BLIT_H
-/* Generated header */ -#include "GP_Blit.gen.h" - /* * Blits rectangle from src defined by x0, y0, x1, y1 (x1, y1 included) to dst * starting on x2, y2. @@ -75,6 +72,13 @@ void GP_BlitXYXY_Raw(const GP_Context *src, GP_Context *dst, GP_Coord x2, GP_Coord y2);
/* + * Naive get/put pixel implementation, used when everything else fails. + */ +void GP_BlitXYXY_Naive_Raw(const GP_Context *src, + GP_Coord x0, GP_Coord y0, GP_Coord x1, GP_Coord y1, + GP_Context *dst, GP_Coord x2, GP_Coord y2); + +/* * Same as GP_BlitXYWH but doesn't respect rotations. Faster (for now). */ void GP_BlitXYWH_Raw(const GP_Context *src, @@ -92,4 +96,6 @@ static inline void GP_Blit_Raw(const GP_Context *src, GP_BlitXYWH_Raw(src, x0, y0, w0, h0, dst, x1, y1); }
+ + #endif /* CORE_GP_BLIT_H */ diff --git a/include/core/GP_Context.h b/include/core/GP_Context.h index d91e54d..a6b76df 100644 --- a/include/core/GP_Context.h +++ b/include/core/GP_Context.h @@ -19,7 +19,7 @@ * Copyright (C) 2009-2011 Jiri "BlueBear" Dluhos * * jiri.bluebear.dluhos@gmail.com * * * - * Copyright (C) 2009-2011 Cyril Hrubis metan@ucw.cz * + * Copyright (C) 2009-2012 Cyril Hrubis metan@ucw.cz * * * *****************************************************************************/
@@ -87,6 +87,14 @@ typedef struct GP_Context { || (y) < 0 || y >= (typeof(y)) context->h) /* + * Check for exactly same rotation flags. + */ +#define GP_CONTEXT_ROTATION_EQUAL(c1, c2) + ((c1)->axes_swap == (c2)->axes_swap && + (c1)->x_swap == (c2)->x_swap && + (c1)->y_swap == (c2)->y_swap) + +/* * Allocate context. */ GP_Context *GP_ContextAlloc(GP_Size w, GP_Size h, GP_PixelType type); diff --git a/include/core/Makefile b/include/core/Makefile index c9be7d8..bf82884 100644 --- a/include/core/Makefile +++ b/include/core/Makefile @@ -1,5 +1,5 @@ TOPDIR=../.. -GENHEADERS=GP_Convert_Scale.gen.h GP_Blit.gen.h GP_Pixel.gen.h +GENHEADERS=GP_Convert_Scale.gen.h GP_Pixel.gen.h GP_GetPutPixel.gen.h GP_Convert.gen.h GP_FnPerBpp.gen.h GP_MixPixels.gen.h GP_GammaCorrection.gen.h LIBNAME=core diff --git a/libs/core/GP_Blit.c b/libs/core/GP_Blit.c index e7576d9..a31ef78 100644 --- a/libs/core/GP_Blit.c +++ b/libs/core/GP_Blit.c @@ -26,28 +26,18 @@ #include "GP_Context.h" #include "GP_Convert.h" #include "GP_Blit.h" +#include "GP_FnPerBpp.h" + +/* Generated function */ +void GP_BlitXYXY_Raw_Fast(const GP_Context *src, + GP_Coord x0, GP_Coord y0, GP_Coord x1, GP_Coord y1, + GP_Context *dst, GP_Coord x2, GP_Coord y2); +
void GP_BlitXYXY_Naive(const GP_Context *src, GP_Coord x0, GP_Coord y0, GP_Coord x1, GP_Coord y1, GP_Context *dst, GP_Coord x2, GP_Coord y2) { - /* Normalize source rectangle */ - if (x1 < x0) - GP_SWAP(x0, x1); - - if (y1 < y0) - GP_SWAP(y0, y1); - - /* All coordinates are inside of src the context */ - GP_CHECK(x0 < (GP_Coord)GP_ContextW(src)); - GP_CHECK(y0 < (GP_Coord)GP_ContextH(src)); - GP_CHECK(x1 < (GP_Coord)GP_ContextW(src)); - GP_CHECK(y1 < (GP_Coord)GP_ContextH(src)); - - /* Destination is big enough */ - GP_CHECK(x2 + (x1 - x0) < (GP_Coord)GP_ContextW(dst)); - GP_CHECK(y2 + (y1 - y0) < (GP_Coord)GP_ContextH(dst)); - GP_Coord x, y;
for (y = y0; y <= y1; y++) @@ -65,7 +55,28 @@ void GP_BlitXYXY(const GP_Context *src, GP_Coord x0, GP_Coord y0, GP_Coord x1, GP_Coord y1, GP_Context *dst, GP_Coord x2, GP_Coord y2) { - //TODO + /* Normalize source rectangle */ + if (x1 < x0) + GP_SWAP(x0, x1); + + if (y1 < y0) + GP_SWAP(y0, y1); + + /* All coordinates are inside of src the context */ + GP_CHECK(x0 < (GP_Coord)GP_ContextW(src)); + GP_CHECK(y0 < (GP_Coord)GP_ContextH(src)); + GP_CHECK(x1 < (GP_Coord)GP_ContextW(src)); + GP_CHECK(y1 < (GP_Coord)GP_ContextH(src)); + + /* Destination is big enough */ + GP_CHECK(x2 + (x1 - x0) < (GP_Coord)GP_ContextW(dst)); + GP_CHECK(y2 + (y1 - y0) < (GP_Coord)GP_ContextH(dst)); + + if (GP_CONTEXT_ROTATION_EQUAL(src, dst)) { + GP_BlitXYXY_Raw_Fast(src, x0, y0, x1, y1, dst, x2, y2); + return; + } + GP_BlitXYXY_Naive(src, x0, y0, x1, y1, dst, x2, y2); }
@@ -79,10 +90,9 @@ void GP_BlitXYWH(const GP_Context *src, GP_BlitXYXY(src, x0, y0, x0 + w0 - 1, y0 + h0 - 1, dst, x1, y1); }
- -void GP_BlitXYXY_Naive_Raw(const GP_Context *src, - GP_Coord x0, GP_Coord y0, GP_Coord x1, GP_Coord y1, - GP_Context *dst, GP_Coord x2, GP_Coord y2) +void GP_BlitXYXY_Raw(const GP_Context *src, + GP_Coord x0, GP_Coord y0, GP_Coord x1, GP_Coord y1, + GP_Context *dst, GP_Coord x2, GP_Coord y2) { /* Normalize source rectangle */ if (x1 < x0) @@ -101,24 +111,7 @@ void GP_BlitXYXY_Naive_Raw(const GP_Context *src, GP_CHECK(x2 + (x1 - x0) < (GP_Coord)dst->w); GP_CHECK(y2 + (y1 - y0) < (GP_Coord)dst->h);
- GP_Coord x, y; - - for (y = y0; y <= y1; y++) - for (x = x0; x <= x1; x++) { - GP_Pixel p = GP_GetPixel_Raw(src, x, y); - - if (src->pixel_type != dst->pixel_type) - p = GP_ConvertContextPixel(p, src, dst); - - GP_PutPixel_Raw(dst, x2 + (x - x0), y2 + (y - y0), p); - } -} - -void GP_BlitXYXY_Raw(const GP_Context *src, - GP_Coord x0, GP_Coord y0, GP_Coord x1, GP_Coord y1, - GP_Context *dst, GP_Coord x2, GP_Coord y2) -{ - GP_BlitXYXY_Naive_Raw(src, x0, y0, x1, y1, dst, x2, y2); + GP_BlitXYXY_Raw_Fast(src, x0, y0, x1, y1, dst, x2, y2); }
void GP_BlitXYWH_Raw(const GP_Context *src, diff --git a/libs/core/GP_Blit.gen.c.t b/libs/core/GP_Blit.gen.c.t index 2259956..9d5c160 100644 --- a/libs/core/GP_Blit.gen.c.t +++ b/libs/core/GP_Blit.gen.c.t @@ -3,72 +3,178 @@ {% block descr %}Specialized blit functions and macros.{% endblock %}
%% block body -#include <stdio.h> #include <string.h> -#include "GP_Pixel.h" -#include "GP_GetPutPixel.h" -#include "GP_Context.h" -#include "GP_Blit.h"
-%% for ps in pixelsizes -/*** Blit preservimg PixelType, variant for {{ ps.suffix }} ***/ -void GP_Blit_{{ ps.suffix }}(const GP_Context *c1, GP_Coord x1, GP_Coord y1, GP_Size w, GP_Size h, - GP_Context *c2, GP_Coord x2, GP_Coord y2) +#include "core/GP_Pixel.h" +#include "core/GP_GetPutPixel.h" +#include "core/GP_Context.h" +#include "core/GP_Blit.h" +#include "core/GP_Debug.h" +#include "core/GP_Convert.h" +#include "core/GP_Convert.gen.h" +#include "core/GP_Convert_Scale.gen.h" + +/* + * TODO: this is used for same pixel but different offset, could still be optimized + */ +void blitXYXY_Naive_Raw(const GP_Context *src, + GP_Coord x0, GP_Coord y0, GP_Coord x1, GP_Coord y1, + GP_Context *dst, GP_Coord x2, GP_Coord y2) { - if (unlikely(w == 0 || h == 0)) return; + GP_Coord x, y; + + for (y = y0; y <= y1; y++) { + for (x = x0; x <= x1; x++) { + GP_Pixel p = GP_GetPixel_Raw(src, x, y); + + if (src->pixel_type != dst->pixel_type) + p = GP_ConvertContextPixel(p, src, dst); + + GP_PutPixel_Raw(dst, x2 + (x - x0), y2 + (y - y0), p); + }
+ } +} + +%% for ps in pixelsizes +/* + * Blit for equal pixel types {{ ps.suffix }} + */ +void GP_BlitXYXY_Raw_{{ ps.suffix }}(const GP_Context *src, + GP_Coord x0, GP_Coord y0, GP_Coord x1, GP_Coord y1, + GP_Context *dst, GP_Coord x2, GP_Coord y2) +{ /* Special case - copy whole line-block with one memcpy() */ - if ((x1 == 0) && (x2 == 0) && (w == c1->w) && (c1->w == c2->w) && - (c1->bytes_per_row == c2->bytes_per_row)) { - memcpy(c2->pixels + c2->bytes_per_row * y2, - c1->pixels + c1->bytes_per_row * y1, - c1->bytes_per_row * h); + if ((x0 == 0) && (x2 == 0) && (x1 == (GP_Coord)src->w - 1) && + (src->w == dst->w) && + (src->bytes_per_row == dst->bytes_per_row)) { + + memcpy(dst->pixels + dst->bytes_per_row * y2, + src->pixels + src->bytes_per_row * y0, + src->bytes_per_row * y1); return; }
%% if not ps.needs_bit_endian() /* General case - memcpy() each horizontal line */ - GP_Size i; + GP_Coord y;
- for (i = 0; i < h; i++) - memcpy(GP_PIXEL_ADDR_{{ ps.suffix }}(c2, x2, y2 + i), - GP_PIXEL_ADDR_{{ ps.suffix }}(c1, x2, y2 + i), - {{ ps.size/8 }} * w); + for (y = y0; y <= y1; y++) + memcpy(GP_PIXEL_ADDR_{{ ps.suffix }}(dst, x2, y2 + y), + GP_PIXEL_ADDR_{{ ps.suffix }}(src, x0, y0 + y), + {{ int(ps.size/8) }} * (x1 - x0 + 1)); %% else /* Rectangles may not be bit-aligned in the same way! */ /* Alignment (index) of first bits in the first byte */ - int al1 = GP_PIXEL_ADDR_OFFSET_{{ ps.suffix }}(x1); + //TODO: This is wrong for subcontexts where the offset + // needs to be summed with context->offset and moduled + int al1 = GP_PIXEL_ADDR_OFFSET_{{ ps.suffix }}(x0); int al2 = GP_PIXEL_ADDR_OFFSET_{{ ps.suffix }}(x2); /* Special case of the same alignment and width >=2 bytes */ - if ((al1 == al2) && (w * {{ ps.size }} >= 16)) { + if ((al1 == al2) && ((x1 - x0 + 1) * {{ ps.size }} >= 16)) { /* Number of bits in the last partial byte */ - int end_al = GP_PIXEL_ADDR_OFFSET_{{ ps.suffix }}(x1 + w); - GP_ASSERT(({{ ps.size }} * w - al1 - end_al) % 8 == 0); - int copy_size = ({{ ps.size }} * w - al1 - end_al) / 8; + int end_al = GP_PIXEL_ADDR_OFFSET_{{ ps.suffix }}(x1); + GP_ASSERT(({{ ps.size }} * (x1 - x0 + 1) - al1 - end_al) % 8 == 0); + int copy_size = ({{ ps.size }} * (x1 - x0 + 1) - al1 - end_al) / 8; /* First and last byte incident to the line */ - uint8_t *p1 = (uint8_t *) GP_PIXEL_ADDR_{{ ps.suffix }}(c1, x1, y1); - uint8_t *p2 = (uint8_t *) GP_PIXEL_ADDR_{{ ps.suffix }}(c2, x2, y2); - uint8_t *end_p1 = (uint8_t *) GP_PIXEL_ADDR_{{ ps.suffix }}(c1, x1 + w - 1, y1); - uint8_t *end_p2 = (uint8_t *) GP_PIXEL_ADDR_{{ ps.suffix }}(c2, x2 + w - 1, y2); + uint8_t *p1 = (uint8_t *) GP_PIXEL_ADDR_{{ ps.suffix }}(src, x1, y1); + uint8_t *p2 = (uint8_t *) GP_PIXEL_ADDR_{{ ps.suffix }}(dst, x2, y2); + uint8_t *end_p1 = (uint8_t *) GP_PIXEL_ADDR_{{ ps.suffix }}(src, x1, y0); + uint8_t *end_p2 = (uint8_t *) GP_PIXEL_ADDR_{{ ps.suffix }}(dst, x2, y2); - GP_Size i; + GP_Coord i;
- for (i = 0; i < h; i++) { + for (i = 0; i < (y1 - y0 + 1); i++) { if (al1 != 0) GP_SET_BITS(al1, 8-al1, *p2, GP_GET_BITS(al1, 8-al1, *p1)); memcpy(p2+(al1!=0), p1+(al1!=0), copy_size); if (end_al != 0) GP_SET_BITS(0, end_al, *end_p2, GP_GET_BITS(0, end_al, *end_p1)); - p1 += c1->bytes_per_row; - end_p1 += c1->bytes_per_row; - p2 += c2->bytes_per_row; - end_p2 += c2->bytes_per_row; + p1 += src->bytes_per_row; + end_p1 += src->bytes_per_row; + p2 += dst->bytes_per_row; + end_p2 += dst->bytes_per_row; } - }// else /* Different bit-alignment, can't use memcpy() */ - // GP_Blit_Naive(c1, x1, y1, w, h, c2, x2, y2); + } else /* Different bit-alignment, can't use memcpy() */ + blitXYXY_Naive_Raw(src, x0, y0, x1, y1, dst, x2, y2); %% endif }
+%% endfor + +/* + * Generate Naive Blits, I know this is n^2 variants but the gain is in speed is + * more than 50% and the size footprint for two for cycles is really small. + */ +%% for src in pixeltypes +%% if not src.is_unknown() and not src.is_palette() +%% for dst in pixeltypes +%% if not dst.is_unknown() and not dst.is_palette() +%% if dst.name != src.name +/* + * Blits {{ src.name }} to {{ dst.name }} + */ +static void blitXYXY_Naive_Raw_{{ src.name }}_{{ dst.name }}(const GP_Context *src, + GP_Coord x0, GP_Coord y0, GP_Coord x1, GP_Coord y1, + GP_Context *dst, GP_Coord x2, GP_Coord y2) +{ + GP_Coord x, y; + + for (y = y0; y <= y1; y++) + for (x = x0; x <= x1; x++) { + GP_Pixel p1, p2 = 0; + p1 = GP_GetPixel_Raw_{{ src.pixelsize.suffix }}(src, x, y); + GP_Pixel_{{ src.name }}_TO_RGB888(p1, p2); + GP_Pixel_RGB888_TO_{{ dst.name }}(p2, p1); + GP_PutPixel_Raw_{{ dst.pixelsize.suffix }}(dst, x2 + (x - x0), y2 + (y - y0), p1); + } +} + +%% endif +%% endif +%% endfor +%% endif +%% endfor + +void GP_BlitXYXY_Raw_Fast(const GP_Context *src, + GP_Coord x0, GP_Coord y0, GP_Coord x1, GP_Coord y1, + GP_Context *dst, GP_Coord x2, GP_Coord y2) +{ + /* Same pixel type, could be (mostly) optimized to memcpy() */ + if (src->pixel_type == dst->pixel_type) { + GP_FN_PER_BPP(GP_BlitXYXY_Raw, src->bpp, src->bit_endian, + src, x0, y0, x1, y1, dst, x2, y2); + return; + }
+ /* Specialized naive functions */ + switch (src->pixel_type) { +%% for src in pixeltypes +%% if not src.is_unknown() and not src.is_palette() + case GP_PIXEL_{{ src.name }}: + switch (dst->pixel_type) { +%% for dst in pixeltypes +%% if not dst.is_unknown() and not dst.is_palette() +%% if dst.name != src.name + case GP_PIXEL_{{ dst.name }}: + blitXYXY_Naive_Raw_{{ src.name }}_{{ dst.name }}(src, x0, y0, + x1, y1, + dst, x2, y2); + break; +%% endif +%% endif +%% endfor + default: + GP_ABORT("Invalid destination pixel %s", + GP_PixelTypeName(dst->pixel_type)); + } + break; +%% endif %% endfor + default: + GP_ABORT("Invalid source pixel %s", + GP_PixelTypeName(src->pixel_type)); + } +} + %% endblock body
http://repo.or.cz/w/gfxprim.git/commit/763bca3b9ac509c837ca940dcede58e58b86d...
commit 763bca3b9ac509c837ca940dcede58e58b86d21b Author: Cyril Hrubis metan@ucw.cz Date: Sat Feb 11 21:34:50 2012 +0100
demos: fbshow add timer for blit measuring.
diff --git a/demos/fbshow/fbshow.c b/demos/fbshow/fbshow.c index 5e3e06d..83bed96 100644 --- a/demos/fbshow/fbshow.c +++ b/demos/fbshow/fbshow.c @@ -211,7 +211,9 @@ static void *image_loader(void *ptr) uint32_t cx = (context->w - ret->w)/2; uint32_t cy = (context->h - ret->h)/2;
+ cpu_timer_start(&timer, "Blitting"); GP_Blit_Raw(ret, 0, 0, ret->w, ret->h, context, cx, cy); + cpu_timer_stop(&timer); GP_ContextFree(ret);
/* clean up the rest of the display */
-----------------------------------------------------------------------
Summary of changes: demos/fbshow/fbshow.c | 2 + include/core/GP_Blit.gen.h.t | 16 ---- include/core/GP_Blit.h | 12 ++- include/core/GP_Context.h | 10 ++- include/core/Makefile | 2 +- libs/core/GP_Blit.c | 73 ++++++++--------- libs/core/GP_Blit.gen.c.t | 180 +++++++++++++++++++++++++++++++++--------- 7 files changed, 197 insertions(+), 98 deletions(-) delete mode 100644 include/core/GP_Blit.gen.h.t
repo.or.cz automatic notification. Contact project admin jiri.bluebear.dluhos@gmail.com if you want to unsubscribe, or site admin admin@repo.or.cz if you receive no reply.