[repo.or.cz] gfxprim.git branch generate updated: bf71ed549471116331486980f67e4f64b840640d - Gfxprim

6 Oct 2011

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project gfxprim.git.
The branch, generate has been updated
       via  bf71ed549471116331486980f67e4f64b840640d (commit)
       via  4d19114d20f531cd49c79158af4263ea4af98109 (commit)
      from  d291a122c53ce9d70a5777b380ae9ca82978987b (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
http://repo.or.cz/w/gfxprim.git/commit/bf71ed549471116331486980f67e4f64b8406...
commit bf71ed549471116331486980f67e4f64b840640d
Author: Cyril Hrubis metan@ucw.cz
Date:   Thu Oct 6 19:28:37 2011 +0200
Attempt to fix the aligned pixel access.
* The Get/PutPixel is now full of special cases
Beware that the code wasn't tested to work yet.

diff --git a/include/core/GP_Common.h b/include/core/GP_Common.h
index 23e5527..cae989e 100644
--- a/include/core/GP_Common.h
+++ b/include/core/GP_Common.h
@@ -140,51 +140,6 @@
    a = tmp;            } while (0)
-/*
- * Helper macros to read/write parts of words 
- *
- * Return (shifted) count bits at offset of value
- * Note: operates with value types same as val 
- */
-#define GP_GET_BITS(offset, count, val) -	( ( (val)>>(offset) ) & ( ((((typeof(val))1)<<(count)) - 1) ) )
-
-/*
- * Debugging version, evaluates args twice.
- */
-#define GP_GET_BITS_DBG(offset, count, val) -	( printf("GET_BITS(%d, %d, 0x%x)=%d", offset, count, val, -	GP_GET_BITS(offset, count, val)), GP_GET_BITS(offset, count, val))
-
-/*
- * Set count bits of dest at ofset to val (shifted by offset)
- * 
- * Does not check val for overflow
- * Operates on 8, 16, and 32 bit values, depending on the type of dest, 
- * this should be unsigned
- *
- * GP_SET_BITS_OR anly sets (|=) the bits, assuming these are clear beforehand
- * GP_CLEAR_BITS sets the target bits to zero
- * GP_SET_BITS does both
- */
-#define GP_CLEAR_BITS(offset, count, dest) -	( (dest) &= ~(((((typeof(dest))1) << (count)) - 1) << (offset)) )
-
-#define GP_SET_BITS_OR(offset, dest, val) ( (dest) |= ((val)<<(offset)) )
-
-#define GP_SET_BITS(offset, count, dest, val) do { -		GP_CLEAR_BITS(offset, count, dest); -		GP_SET_BITS_OR(offset, dest, val); -} while (0)
-
-/* 
- * Debugging version, evaluates args twice.
- */
-#define GP_SET_BITS_DBG(offset, count, dest, val) do { -	GP_SET_BITS(offset, count, dest, val); -	printf("SET_BITS(%d, %d, p, %d)n", offset, count, val); -} while (0)
-
/* Determines the sign of the integer value; it is +1 if value is positive,
  * -1 if negative, and 0 if it is zero.
diff --git a/include/core/GP_Convert.gen.h.t b/include/core/GP_Convert.gen.h.t
index 56759ee..760eb74 100644
--- a/include/core/GP_Convert.gen.h.t
+++ b/include/core/GP_Convert.gen.h.t
@@ -45,7 +45,7 @@
%% block body
-#include "GP_Common.h"
+#include "GP_GetSetBits.h"
 #include "GP_Context.h"
 #include "GP_Pixel.h"
diff --git a/include/core/GP_GetPutPixel.gen.h.t b/include/core/GP_GetPutPixel.gen.h.t
index ee1ea90..2a8529b 100644
--- a/include/core/GP_GetPutPixel.gen.h.t
+++ b/include/core/GP_GetPutPixel.gen.h.t
@@ -7,7 +7,49 @@ Do not include directly, use GP_Pixel.h
%% block body
-#include "GP_Common.h"
+
+ /*
+
+   Note about byte aligment
+   ~~~~~~~~~~~~~~~~~~~~~~~~
+
+   Unaligned acces happens when instruction that works with multiple byte value
+   gets an address that is not divideable by the size of the value. Eg. if 32
+   bit integer instruction gets an address that is not a multiple of 4. On
+   intel cpus this type of acces works and is supported however the C standard
+   defines this as undefined behavior. This fails to work ARM and most of the
+   non intel cpus. So some more trickery must be done in order to write
+   unaligned multibyte values. First of all we must compute offset and number of
+   bytes to be accessed (which is cruicial for speed as we are going to read the
+   pixel value byte by byte.
+
+   The offsets (starting with the first one eg. pixel_size mod 8) forms subgroup
+   in the mod 8 cyclic group. The maximal count of bits, from the start of the
+   byte, then will be max from this subgroup + pixel_size. If this number is
+   less or equal to 8 * N, we could write such pixel by writing N bytes.
+
+   For example the offsets of 16 BPP forms subgroup only with {0} so we only
+   need 2 bytes to write it. As a matter of fact the 16 and 32 BPP are special
+   cases that are always aligned and together with the 8 BPP (which is aligned
+   trivially). These three are coded as special cases which yields to faster
+   operations in case of 16 and 32 BPP. The 24 BPP is not aligned as there are
+   no instruction to operate 3 byte long numbers.
+
+   For second example take offsets of 20 BPP that forms subgroup {4, 0}
+   so the max + pixel_size = 24 and indeed we fit into 3 bytes.
+
+   If pixel_size is coprime to 8, the offsets generates whole group and so
+   the max + pixel_size = 7 + pixel_size. The 17 BPP fits into 24 bits and
+   so also 3 bytes are needed. The 19 BPP fits into 26 bits and because of
+   that 4 bytes are needed. 
+
+   Once we figure maximal number of bytes and the offset all that is to be done
+   is to fetch first and last byte to combine it together with given pixel value
+   and write the result back to the bitmap.
+
+ */
+
+#include "GP_GetSetBits.h"
 #include "GP_Context.h"
%% for ps in pixelsizes
@@ -44,12 +86,73 @@ Do not include directly, use GP_Pixel.h
  */
 static inline GP_Pixel GP_GetPixel_Raw_{{ ps.suffix }}(const GP_Context *c, int x, int y)
 {
-{# Special case to prevent some of the "overflow" warnings -#}
-%% if ps.size == config.pixel_size
-	return *(GP_PIXEL_ADDR_{{ ps.suffix}}(c, x, y));
+%% if ps.size == 32
+	/*
+	 * 32 BPP is expected to have aligned pixels
+	 */
+	return *((uint32_t*)GP_PIXEL_ADDR_{{ ps.suffix }}(c, x, y));
+%% elif ps.size == 16
+	/*
+	 * 16 BPP is expected to have aligned pixels
+	 */
+	return *((uint16_t*)GP_PIXEL_ADDR_{{ ps.suffix }}(c, x, y));
+%% elif ps.size == 8
+	/*
+	 * 8 BPP is byte aligned
+	 */
+	return *((uint8_t*)GP_PIXEL_ADDR_{{ ps.suffix }}(c, x, y));
+%% elif ps.size == 1 or ps.size == 2 or ps.size == 4 or ps.size == 8
+	/*
+	 * Whole pixel is stored only and only in one byte
+	 *
+	 * The full list = {1, 2, 4, 8}
+	 */
+	return GP_GET_BITS1_ALIGNED(GP_PIXEL_ADDR_OFFSET_{{ ps.suffix }}(x), {{ ps.size }},
+		*(GP_PIXEL_ADDR_{{ ps.suffix }}(c, x, y)));
+%% elif ps.size <= 10 or ps.size == 12 or ps.size == 16
+	/*
+	 * The pixel is stored in one or two bytes
+	 *
+	 * The max from subgroup (of mod 8 factor group) generated by
+	 * pixel_size mod 8 + pixel_size <= 16
+	 *
+	 * The full list = {3, 5, 6, 7, 9, 10, 12, 16}
+	 *
+	 * Hint: If the pixel size is coprime to 8 the group is generated by
+	 *       pixel_size mod 8 and maximal size thus is pixel_size + 7
+	 */
+	return GP_GET_BITS2_ALIGNED(GP_PIXEL_ADDR_OFFSET_{{ ps.suffix }}(x), {{ ps.size }},
+		*(GP_PIXEL_ADDR_{{ ps.suffix }}(c, x, y)));
+%% elif ps.size <= 18 or ps.size == 20 or ps.size == 24
+	/*
+	 * The pixel is stored in two or three bytes
+	 *
+	 * The max from subgroup (of mod 8 factor group) generated by
+	 * pixel_size mod 8 + pixel_size <= 24
+	 *
+	 * The full list = {11, 13, 14, 15, 17, 18, 20, 24}
+	 *
+	 * Hint: If the pixel size is coprime to 8 the group is generated by
+	 *       pixel_size mod 8 and maximal size thus is pixel_size + 7
+	 */
+	return GP_GET_BITS3_ALIGNED(GP_PIXEL_ADDR_OFFSET_{{ ps.suffix }}(x), {{ ps.size }},
+		*(GP_PIXEL_ADDR_{{ ps.suffix }}(c, x, y)));
+%% elif ps.size <= 23 or ps.size == 25 or ps.size == 26 or ps.size == 28 or ps.size == 32
+	/*
+	 * The pixel is stored in three or four bytes
+	 *
+	 * The max from subgroup (of mod 8 factor group) generated by
+	 * pixel_size mod 8 + pixel_size <= 32
+	 *
+	 * The full list = {19, 21, 22, 23, 25, 26, 28, 32}
+	 *
+	 * Hint: If the pixel size is coprime to 8 the group is generated by
+	 *       pixel_size mod 8 and maximal size thus is pixel_size + 7
+	 */
+	return GP_GET_BITS4_ALIGNED(GP_PIXEL_ADDR_OFFSET_{{ ps.suffix }}(x), {{ ps.size }},
+		*(GP_PIXEL_ADDR_{{ ps.suffix }}(c, x, y)));
 %% else
-	return GP_GET_BITS(GP_PIXEL_ADDR_OFFSET_{{ ps.suffix }}(x), {{ ps.size }},
-		*(GP_PIXEL_ADDR_{{ ps.suffix}}(c, x, y)));
+	#error not implemented
 %% endif
 }
@@ -58,12 +161,73 @@ static inline GP_Pixel GP_GetPixel_Raw_{{ ps.suffix }}(const GP_Context *c, int
  */
 static inline void GP_PutPixel_Raw_{{ ps.suffix }}(GP_Context *c, int x, int y, GP_Pixel p)
 {
-{# Special case to prevent some of the "overflow" warnings -#}
-%% if ps.size == config.pixel_size
-	*(GP_PIXEL_ADDR_{{ ps.suffix}}(c, x, y)) = p;
+%% if ps.size == 32
+	/*
+	 * 32 BPP is expected to have aligned pixels
+	 */
+	*((uint32_t*)GP_PIXEL_ADDR_{{ ps.suffix }}(c, x, y)) = p;
+%% elif ps.size == 16
+	/*
+	 * 16 BPP is expected to have aligned pixels
+	 */
+	*((uint16_t*)GP_PIXEL_ADDR_{{ ps.suffix }}(c, x, y)) = p;
+%% elif ps.size == 8
+	/*
+	 * 8 BPP is byte aligned
+	 */
+	*((uint8_t*)GP_PIXEL_ADDR_{{ ps.suffix }}(c, x, y)) = p;
+%% elif ps.size == 1 or ps.size == 2 or ps.size == 4 or ps.size == 8
+	/*
+	 * Whole pixel is stored only and only in one byte
+	 *
+	 * The full list = {1, 2, 4, 8}
+	 */
+	GP_SET_BITS1_ALIGNED(GP_PIXEL_ADDR_OFFSET_{{ ps.suffix }}(x), {{ ps.size }},
+	                     GP_PIXEL_ADDR_{{ ps.suffix }}(c, x, y), p);
+%% elif ps.size <= 10 or ps.size == 12 or ps.size == 16
+	/*
+	 * The pixel is stored in one or two bytes
+	 *
+	 * The max from subgroup (of mod 8 factor group) generated by
+	 * pixel_size mod 8 + pixel_size <= 16
+	 *
+	 * The full list = {3, 5, 6, 7, 9, 10, 12, 16}
+	 *
+	 * Hint: If the pixel size is coprime to 8 the group is generated by
+	 *       pixel_size mod 8 and maximal size thus is pixel_size + 7
+	 */
+	GP_SET_BITS2_ALIGNED(GP_PIXEL_ADDR_OFFSET_{{ ps.suffix }}(x), {{ ps.size }},
+	                     GP_PIXEL_ADDR_{{ ps.suffix }}(c, x, y), p);
+%% elif ps.size <= 18 or ps.size == 20 or ps.size == 24
+	/*
+	 * The pixel is stored in two or three bytes
+	 *
+	 * The max from subgroup (of mod 8 factor group) generated by
+	 * pixel_size mod 8 + pixel_size <= 24
+	 *
+	 * The full list = {11, 13, 14, 15, 17, 18, 20, 24}
+	 *
+	 * Hint: If the pixel size is coprime to 8 the group is generated by
+	 *       pixel_size mod 8 and maximal size thus is pixel_size + 7
+	 */
+	GP_SET_BITS3_ALIGNED(GP_PIXEL_ADDR_OFFSET_{{ ps.suffix }}(x), {{ ps.size }},
+	                     GP_PIXEL_ADDR_{{ ps.suffix }}(c, x, y), p);
+%% elif ps.size <= 23 or ps.size == 25 or ps.size == 26 or ps.size == 28 or ps.size == 32
+	/*
+	 * The pixel is stored in three or four bytes
+	 *
+	 * The max from subgroup (of mod 8 factor group) generated by
+	 * pixel_size mod 8 + pixel_size <= 32
+	 *
+	 * The full list = {19, 21, 22, 23, 25, 26, 28, 32}
+	 *
+	 * Hint: If the pixel size is coprime to 8 the group is generated by
+	 *       pixel_size mod 8 and maximal size thus is pixel_size + 7
+	 */
+	GP_SET_BITS4_ALIGNED(GP_PIXEL_ADDR_OFFSET_{{ ps.suffix }}(x), {{ ps.size }},
+	                     GP_PIXEL_ADDR_{{ ps.suffix }}(c, x, y), p);
 %% else
-	GP_SET_BITS(GP_PIXEL_ADDR_OFFSET_{{ ps.suffix }}(x), {{ ps.size }},
-		*(GP_PIXEL_ADDR_{{ ps.suffix}}(c, x, y)), p);
+	#error not implemented
 %% endif
 }
diff --git a/include/core/GP_GetSetBits.h b/include/core/GP_GetSetBits.h
new file mode 100644
index 0000000..bd55fe2
--- /dev/null
+++ b/include/core/GP_GetSetBits.h
@@ -0,0 +1,148 @@
+/*****************************************************************************
+ * This file is part of gfxprim library.                                     *
+ *                                                                           *
+ * Gfxprim is free software; you can redistribute it and/or                  *
+ * modify it under the terms of the GNU Lesser General Public                *
+ * License as published by the Free Software Foundation; either              *
+ * version 2.1 of the License, or (at your option) any later version.        *
+ *                                                                           *
+ * Gfxprim is distributed in the hope that it will be useful,                *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of            *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU         *
+ * Lesser General Public License for more details.                           *
+ *                                                                           *
+ * You should have received a copy of the GNU Lesser General Public          *
+ * License along with gfxprim; if not, write to the Free Software            *
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor,                        *
+ * Boston, MA  02110-1301  USA                                               *
+ *                                                                           *
+ * Copyright (C) 2011      Tomas Gavenciak gavento@ucw.cz                  *
+ * Copyright (C) 2011      Cyril Hrubis metan@ucw.cz                       *
+ *                                                                           *
+ *****************************************************************************/
+
+ /*
+  
+   The macros are taking generally three arguments
+  
+  */
+
+#ifndef GP_GET_SET_BITS_H
+#define GP_GET_SET_BITS_H
+
+/*
+ * Helper macros to read/write parts of words 
+ *
+ * Return (shifted) count bits at offset of value
+ * Note: operates with value types same as val 
+ */
+#define GP_GET_BITS(offset, len, val) +	( ( (val)>>(offset) ) & ( ((((typeof(val))1)<<(len)) - 1) ) )
+
+/*
+ * Align-safe getbits
+ *
+ * TODO: Fix big endian
+ */
+#define GP_GET_BITS4_ALIGNED(offset, len, val) ({ +	uint32_t v;                               +	v  = ((uint8_t *)&val)[0];                +	v |= ((uint8_t *)&val)[1]<<8;             +	v |= ((uint8_t *)&val)[2]<<16;            +	v |= ((uint8_t *)&val)[3]<<24;            +                                                  +	GP_GET_BITS(offset, len, v);              +})
+
+#define GP_GET_BITS3_ALIGNED(offset, len, val) ({ +	uint32_t v;                               +	v  = ((uint8_t *)&val)[0];                +	v |= ((uint8_t *)&val)[1]<<8;             +	v |= ((uint8_t *)&val)[2]<<16;            +                                                  +	GP_GET_BITS(offset, len, v);              +})
+
+#define GP_GET_BITS2_ALIGNED(offset, len, val) ({ +	uint16_t v;                               +	v  = ((uint8_t *)&val)[0];                +	v |= ((uint8_t *)&val)[1]<<8;             +                                                  +	GP_GET_BITS(offset, len, v);              +})
+
+#define GP_GET_BITS1_ALIGNED(offset, len, val) ({ +	uint8_t v;                                +	v = ((uint8_t *)&val)[0];                 +                                                  +	GP_GET_BITS(offset, len, v);              +})
+
+/*
+ * Set count bits of dest at ofset to val (shifted by offset)
+ * 
+ * Does not check val for overflow
+ * Operates on 8, 16, and 32 bit values, depending on the type of dest, 
+ * this should be unsigned
+ *
+ * GP_SET_BITS_OR anly sets (|=) the bits, assuming these are clear beforehand
+ * GP_CLEAR_BITS sets the target bits to zero
+ * GP_SET_BITS does both
+ */
+#define GP_CLEAR_BITS(offset, len, dest) +       ( (dest) &= ~(((((typeof(dest))1) << (len)) - 1) << (offset)) )
+
+#define GP_SET_BITS_OR(offset, dest, val) ( (dest) |= ((val)<<(offset)) )
+
+#define GP_SET_BITS(offset, len, dest, val) do {  +               GP_CLEAR_BITS(offset, len, dest);  +               GP_SET_BITS_OR(offset, dest, val); +} while (0)
+
+/*
+ * Align-safe setbits
+ */
+#define GP_SET_BITS1_ALIGNED(offset, len, dest, val) do { +	uint8_t v = ((uint8_t *)dest)[0];                 +	GP_SET_BITS(offset, len, v, val);                 +	((uint8_t *)dest)[0] = v;                         +} while (0)
+
+#define GP_SET_BITS2_ALIGNED(offset, len, dest, val) do { +	uint16_t v;                                       +	v  = ((uint8_t *)dest)[0];                        +	v |= ((uint8_t *)dest)[1]<<8;                     +	                                                  +	GP_SET_BITS(offset, len, v, val);                 +	                                                  +	((uint8_t *)dest)[0] = 0xff & v;                  +	((uint8_t *)dest)[1] = 0xff & (v >> 8);           +} while (0)
+
+#define GP_SET_BITS3_ALIGNED(offset, len, dest, val) do { +	uint32_t v;                                       +	v  = ((uint8_t *)dest)[0];                        +	v |= ((uint8_t *)dest)[2]<<16;                    +	                                                  +	GP_SET_BITS(offset, len, v, val);                 +	                                                  +	((uint8_t *)dest)[0] = 0xff & v;                  +	((uint8_t *)dest)[1] = 0xff & (v >> 8);           +	((uint8_t *)dest)[2] = 0xff & (v >> 16);          +} while (0)
+
+#define GP_SET_BITS4_ALIGNED(offset, len, dest, val) do { +	uint32_t v;                                       +	v  = ((uint8_t *)dest)[0];                        +	v |= ((uint8_t *)dest)[3]<<24;                    +	                                                  +	GP_SET_BITS(offset, len, v, val);                 +	                                                  +	((uint8_t *)dest)[0] = 0xff & v;                  +	((uint8_t *)dest)[1] = 0xff & (v >> 8);           +	((uint8_t *)dest)[2] = 0xff & (v >> 16);          +	((uint8_t *)dest)[3] = 0xff & (v >> 24);          +} while (0)
+
+
+#endif /* GP_GET_SET_BITS_H */
diff --git a/libs/core/GP_Pixel.gen.c.t b/libs/core/GP_Pixel.gen.c.t
index e0cf7c0..f41625b 100644
--- a/libs/core/GP_Pixel.gen.c.t
+++ b/libs/core/GP_Pixel.gen.c.t
@@ -8,6 +8,7 @@ Do not include directly, use GP_Pixel.h
 %% block body
 #include <stdio.h>
 #include "GP_Pixel.h"
+#include "GP_GetSetBits.h"
/* 
  * Description of all known pixel types 
@@ -29,6 +30,8 @@ const GP_PixelTypeDescription const GP_PixelTypes [] = {
 %% endfor
 };
+#warning FIXME: do generic get set bit for pixel printing
+
 %% for pt in pixeltypes
 %% if not pt.is_unknown()
 /*
diff --git a/tests/SDL/shapetest.c b/tests/SDL/shapetest.c
index 57cca2d..3c61629 100644
--- a/tests/SDL/shapetest.c
+++ b/tests/SDL/shapetest.c
@@ -485,6 +485,8 @@ int main(int argc, char ** argv)
    	}
    }
+	GP_SetDebugLevel(10);
+
    /* Initialize SDL */
    if (SDL_Init(SDL_INIT_VIDEO | SDL_INIT_TIMER) != 0) {
    	fprintf(stderr, "Could not initialize SDL: %sn", SDL_GetError());
http://repo.or.cz/w/gfxprim.git/commit/4d19114d20f531cd49c79158af4263ea4af98...
commit 4d19114d20f531cd49c79158af4263ea4af98109
Author: Cyril Hrubis metan@ucw.cz
Date:   Thu Oct 6 18:15:08 2011 +0200
Remove the compression from the 'make tar' target.
The compression of .git repository data is too slow
    and takes several minutes, which is bad, when you are
    about to leave the train, but the data needs to be
    backed up.
diff --git a/Makefile b/Makefile
index 5c4d45f..acc8543 100644
--- a/Makefile
+++ b/Makefile
@@ -35,4 +35,4 @@ endif
tar:
    $(MAKE) clean
-	cd .. && tar cjf gfxprim-`date +%Y-%b-%d-%HH%MM`.tar.bz2 gfxprim
+	cd .. && tar cf gfxprim-`date +%Y-%b-%d-%HH%MM`.tar gfxprim
-----------------------------------------------------------------------
Summary of changes:
 Makefile                            |    2 +-
 include/core/GP_Common.h            |   45 ---------
 include/core/GP_Convert.gen.h.t     |    2 +-
 include/core/GP_GetPutPixel.gen.h.t |  186 ++++++++++++++++++++++++++++++++--
 include/core/GP_GetSetBits.h        |  148 ++++++++++++++++++++++++++++
 libs/core/GP_Pixel.gen.c.t          |    3 +
 tests/SDL/shapetest.c               |    2 +
 7 files changed, 330 insertions(+), 58 deletions(-)
 create mode 100644 include/core/GP_GetSetBits.h
repo.or.cz automatic notification. Contact project admin jiri.bluebear.dluhos@gmail.com
if you want to unsubscribe, or site admin admin@repo.or.cz if you receive
no reply.
-- 
gfxprim.git ("A simple 2D graphics library with emphasis on correctness and well-defined operation.")