diff src/video/x11/SDL_x11events.c @ 3978:b966761fef6c SDL-1.2

Significantly improved XIM support. Fixes Bugzilla #429. Selected notes from the patch's README: = FIXES = This patch fixes the above issues as follows. == X11 events == Moved XFilterEvent just after XNextEvent so that all events are passed to it. Also, XFilterEvent will receive masks indicated by IM through XNFilterEvents IC value as well as masks surpplied by SDL. X11_KeyRepeat is called between XNextEvent and XFilterEvent, after testing an event is a KeyRelease. I'm not 100% comfortable to do so, but I couldn't find a better timing to call it, and use of the function is inevitable. == Xutf8LookupString == Used a longer buffer to receive UTF-8 string. If it is insufficient, a dynamic storage of the requested size will be allocated. The initial size of the buffer is set to 32, because the Japanese text converted from the most widely used benchmark key sequence for Japanese IM, "WATASHINONAMAEHANAKANODESU." has ten Japanese characters in it, that occupies 30 bytes when encoded in UTF-8. == SDL_keysym.unicode == On Windows version of SDL implementation, SDL_keysym.unicode stores UTF-16 encoded unicode characters, one UTF-16 encoding unit per an SDL event. A Unicode supplementary characters are sent to an application as two events. (One with a high surrogate and another with a low surrogate.) The behavior seems reasonable since it is upward compatible with existing handling of BMP characters. I wrote a UTF-8 to UTF-16 conversion function for the purpose. It is designed with the execution speed in mind, having a minimum set of features that my patch requires.
author Ryan C. Gordon <icculus@icculus.org>
date Mon, 25 Jun 2007 19:58:32 +0000
parents c5c3c772f5aa
children f61a20d195f7
line wrap: on
line diff
--- a/src/video/x11/SDL_x11events.c	Mon Jun 25 14:58:22 2007 +0000
+++ b/src/video/x11/SDL_x11events.c	Mon Jun 25 19:58:32 2007 +0000
@@ -176,6 +176,124 @@
 	}
 	return c;
 }
+
+/* Given a UTF-8 encoded string pointed to by utf8 of length length in
+   bytes, returns the corresponding UTF-16 encoded string in the
+   buffer pointed to by utf16.  The maximum number of UTF-16 encoding
+   units (i.e., Unit16s) allowed in the buffer is specified in
+   utf16_max_length.  The return value is the number of UTF-16
+   encoding units placed in the output buffer pointed to by utf16.
+
+   In case of an error, -1 is returned, leaving some unusable partial
+   results in the output buffer.
+
+   The caller must estimate the size of utf16 buffer by itself before
+   calling this function.  Insufficient output buffer is considered as
+   an error, and once an error occured, this function doesn't give any
+   clue how large the result will be.
+
+   The error cases include following:
+
+   - Invalid byte sequences were in the input UTF-8 bytes.  The caller
+     has no way to know what point in the input buffer was the
+     errornous byte.
+
+   - The input contained a character (a valid UTF-8 byte sequence)
+     whose scalar value exceeded the range that UTF-16 can represent
+     (i.e., characters whose Unicode scalar value above 0x110000).
+
+   - The output buffer has no enough space to hold entire utf16 data.
+
+   Please note:
+
+   - '\0'-termination is not assumed both on the input UTF-8 string
+     and on the output UTF-16 string; any legal zero byte in the input
+     UTF-8 string will be converted to a 16-bit zero in output.  As a
+     side effect, the last UTF-16 encoding unit stored in the output
+     buffer will have a non-zero value if the input UTF-8 was not
+     '\0'-terminated.
+
+   - UTF-8 aliases are *not* considered as an error.  They are
+     converted to UTF-16.  For example, 0xC0 0xA0, 0xE0 0x80 0xA0, 
+     and 0xF0 0x80 0x80 0xA0 are all mapped to a single UTF-16
+     encoding unit 0x0020.
+
+   - Three byte UTF-8 sequences whose value corresponds to a surrogate
+     code or other reserved scalar value are not considered as an
+     error either.  They may cause an invalid UTF-16 data (e.g., those
+     containing unpaired surrogates).
+
+*/
+
+static int Utf8ToUtf16(const Uint8 *utf8, const int utf8_length, Uint16 *utf16, const int utf16_max_length) {
+
+    /* p moves over the output buffer.  max_ptr points to the next to the last slot of the buffer.  */
+    Uint16 *p = utf16;
+    Uint16 const *const max_ptr = utf16 + utf16_max_length;
+
+    /* end_of_input points to the last byte of input as opposed to the next to the last byte.  */
+    Uint8 const *const end_of_input = utf8 + utf8_length - 1;
+
+    while (utf8 <= end_of_input) {
+	if (p >= max_ptr) {
+	    /* No more output space.  */
+	    return -1;
+	}
+	Uint8 const c = *utf8;
+	if (c < 0x80) {
+	    /* One byte ASCII.  */
+	    *p++ = c;
+	    utf8 += 1;
+	} else if (c < 0xC0) {
+	    /* Follower byte without preceeding leader bytes.  */
+	    return -1;
+	} else if (c < 0xE0) {
+	    /* Two byte sequence.  We need one follower byte.  */
+	    if (end_of_input - utf8 < 1 || (((utf8[1] ^ 0x80)) & 0xC0)) {
+		return -1;
+	    }
+	    *p++ = (Uint16)(0xCF80 + (c << 6) + utf8[1]);
+	    utf8 += 2;
+	} else if (c < 0xF0) {
+	    /* Three byte sequence.  We need two follower byte.  */
+	    if (end_of_input - utf8 < 2 || (((utf8[1] ^ 0x80) | (utf8[2] ^ 0x80)) & 0xC0)) {
+		return -1;
+	    }
+	    *p++ = (Uint16)(0xDF80 + (c << 12) + (utf8[1] << 6) + utf8[2]);
+	    utf8 += 3;
+	} else if (c < 0xF8) {
+	    int plane;
+	    /* Four byte sequence.  We need three follower bytes.  */
+	    if (end_of_input - utf8 < 3 || (((utf8[1] ^ 0x80) | (utf8[2] ^0x80) | (utf8[3] ^ 0x80)) & 0xC0)) {
+		return -1;
+	    }
+	    plane = (-0xC8 + (c << 2) + (utf8[1] >> 4));
+	    if (plane == 0) {
+		/* This four byte sequence is an alias that
+                   corresponds to a Unicode scalar value in BMP.
+		   It fits in an UTF-16 encoding unit.  */
+		*p++ = (Uint16)(0xDF80 + (utf8[1] << 12) + (utf8[2] << 6) + utf8[3]);
+	    } else if (plane <= 16) {
+		/* This is a legal four byte sequence that corresponds to a surrogate pair.  */
+		if (p + 1 >= max_ptr) {
+		    /* No enough space on the output buffer for the pair.  */
+		    return -1;
+		}
+		*p++ = (Uint16)(0xE5B8 + (c << 8) + (utf8[1] << 2) + (utf8[2] >> 4));
+		*p++ = (Uint16)(0xDB80 + ((utf8[2] & 0x0F) << 6) + utf8[3]);
+	    } else {
+		/* This four byte sequence is out of UTF-16 code space.  */
+		return -1;
+	    }
+	    utf8 += 4;
+	} else {
+	    /* Longer sequence or unused byte.  */
+	    return -1;
+	}
+    }
+    return p - utf16;
+}
+
 #endif
 
 /* Check to see if this is a repeated key.
@@ -275,6 +393,24 @@
 	SDL_memset(&xevent, '\0', sizeof (XEvent));  /* valgrind fix. --ryan. */
 	XNextEvent(SDL_Display, &xevent);
 
+	/* Discard KeyRelease and KeyPress events generated by auto-repeat.
+	   We need to do it before passing event to XFilterEvent.  Otherwise,
+	   KeyRelease aware IMs are confused...  */
+	if ( xevent.type == KeyRelease
+	     && X11_KeyRepeat(SDL_Display, &xevent) ) {
+		return 0;
+	}
+
+#ifdef X_HAVE_UTF8_STRING
+	/* If we are translating with IM, we need to pass all events
+	   to XFilterEvent, and discard those filtered events immediately.  */
+	if ( SDL_TranslateUNICODE
+	     && SDL_IM != NULL
+	     && XFilterEvent(&xevent, None) ) {
+		return 0;
+	}
+#endif
+
 	posted = 0;
 	switch (xevent.type) {
 
@@ -358,6 +494,13 @@
 	    }
 	    break;
 
+	    /* Some IM requires MappingNotify to be passed to
+	       XRefreshKeyboardMapping by the app.  */
+	    case MappingNotify: {
+		XRefreshKeyboardMapping(&xevent.xmapping);
+	    }
+	    break;
+
 	    /* Generated upon EnterWindow and FocusIn */
 	    case KeymapNotify: {
 #ifdef DEBUG_XEVENTS
@@ -409,50 +552,168 @@
 
 	    /* Key press? */
 	    case KeyPress: {
-		static SDL_keysym saved_keysym;
 		SDL_keysym keysym;
 		KeyCode keycode = xevent.xkey.keycode;
 
 #ifdef DEBUG_XEVENTS
 printf("KeyPress (X11 keycode = 0x%X)\n", xevent.xkey.keycode);
 #endif
-		/* Get the translated SDL virtual keysym */
-		if ( keycode ) {
+		/* If we're not doing translation, we're done! */
+		if ( !SDL_TranslateUNICODE ) {
+			/* Get the translated SDL virtual keysym and put it on the queue.*/
 			keysym.scancode = keycode;
 			keysym.sym = X11_TranslateKeycode(SDL_Display, keycode);
 			keysym.mod = KMOD_NONE;
 			keysym.unicode = 0;
-		} else {
-			keysym = saved_keysym;
-		}
-
-		/* If we're not doing translation, we're done! */
-		if ( !SDL_TranslateUNICODE ) {
 			posted = SDL_PrivateKeyboard(SDL_PRESSED, &keysym);
 			break;
 		}
 
-		if ( XFilterEvent(&xevent, None) ) {
-			if ( xevent.xkey.keycode ) {
-				posted = SDL_PrivateKeyboard(SDL_PRESSED, &keysym);
-			} else {
-				/* Save event to be associated with IM text
-				   In 1.3 we'll have a text event instead.. */
-				saved_keysym = keysym;
-			}
-			break;
-		}
-
 		/* Look up the translated value for the key event */
 #ifdef X_HAVE_UTF8_STRING
 		if ( SDL_IC != NULL ) {
-			static Status state;
+			Status status;
+			KeySym xkeysym;
+			int i;
 			/* A UTF-8 character can be at most 6 bytes */
-			char keybuf[6];
-			if ( Xutf8LookupString(SDL_IC, &xevent.xkey,
-			                        keybuf, sizeof(keybuf),
-			                        NULL, &state) ) {
-				keysym.unicode = Utf8ToUcs4((Uint8*)keybuf);
+			/* ... It's true, but Xutf8LookupString can
+			   return more than one characters.  Moreover,
+			   the spec. put no upper bound, so we should
+			   be ready for longer strings.  */
+			char keybuf[32];
+			char *keydata = keybuf;
+			int count;
+			Uint16 utf16buf[32];
+			Uint16 *utf16data = utf16buf;
+			int utf16size;
+			int utf16length;
+
+			count = Xutf8LookupString(SDL_IC, &xevent.xkey, keydata, sizeof(keybuf), &xkeysym, &status);
+			if (XBufferOverflow == status) {
+			  /* The IM has just generated somewhat long
+			     string.  We need a longer buffer in this
+			     case.  */
+			  keydata = SDL_malloc(count);
+			  if ( keydata == NULL ) {
+			    SDL_OutOfMemory();
+			    break;
+			  }
+			  count = Xutf8LookupString(SDL_IC, &xevent.xkey, keydata, count, &xkeysym, &status);
+			}
+
+			switch (status) {
+
+			case XBufferOverflow: {
+			  /* Oops!  We have allocated the bytes as
+			     requested by Xutf8LookupString, so the
+			     length of the buffer must be
+			     sufficient.  This case should never
+			     happen! */
+			  SDL_SetError("Xutf8LookupString indicated a double buffer overflow!");
+			  break;
+			}
+
+			case XLookupChars:
+			case XLookupBoth: {
+			  if (0 == count) {
+			    break;
+			  }
+
+			  /* We got a converted string from IM.  Make
+			     sure to deliver all characters to the
+			     application as SDL events.  Note that
+			     an SDL event can only carry one UTF-16
+			     encoding unit, and a surrogate pair is
+			     delivered as two SDL events.  I guess
+			     this behaviour is probably _imported_
+			     from Windows or MacOS.  To do so, we need
+			     to convert the UTF-8 data into UTF-16
+			     data (not UCS4/UTF-32!).  We need an
+			     estimate of the number of UTF-16 encoding
+			     units here.  The worst case is pure ASCII
+			     string.  Assume so. */
+			  /* In 1.3 SDL may have a text event instead, that
+			     carries the whole UTF-8 string with it. */
+			  utf16size = count * sizeof(Uint16);
+			  if (utf16size > sizeof(utf16buf)) {
+			    utf16data = (Uint16 *) SDL_malloc(utf16size);
+			    if (utf16data == NULL) {
+			      SDL_OutOfMemory();
+			      break;
+			    }
+			  }
+			  utf16length = Utf8ToUtf16((Uint8 *)keydata, count, utf16data, utf16size);
+			  if (utf16length < 0) {
+			    /* The keydata contained an invalid byte
+			       sequence.  It should be a bug of the IM
+			       or Xlib... */
+			    SDL_SetError("Oops! Xutf8LookupString returned an invalid UTF-8 sequence!");
+			    break;
+			  }
+
+			  /* Deliver all UTF-16 encoding units.  At
+			     this moment, SDL event queue has a
+			     fixed size (128 events), and an SDL
+			     event can hold just one UTF-16 encoding
+			     unit.  So, if we receive more than 128
+			     UTF-16 encoding units from a commit,
+			     exceeded characters will be lost.  */
+			  for (i = 0; i < utf16length - 1; i++) {
+			    keysym.scancode = 0;
+			    keysym.sym = SDLK_UNKNOWN;
+			    keysym.mod = KMOD_NONE;
+			    keysym.unicode = utf16data[i];
+			    posted = SDL_PrivateKeyboard(SDL_PRESSED, &keysym);
+			  }
+			  /* The keysym for the last character carries the
+			     scancode and symbol that corresponds to the X11
+			     keycode.  */
+			  if (utf16length > 0) {			       
+			    keysym.scancode = keycode;
+			    keysym.sym = (keycode ? X11_TranslateKeycode(SDL_Display, keycode) : 0);
+			    keysym.mod = KMOD_NONE;
+			    keysym.unicode = utf16data[utf16length - 1];
+			    posted = SDL_PrivateKeyboard(SDL_PRESSED, &keysym);
+			  }
+			  break;
+			}
+
+			case XLookupKeySym: {
+			  /* I'm not sure whether it is possible that
+			     a zero keycode makes XLookupKeySym
+			     status.  What I'm sure is that a
+			     combination of a zero scan code and a non
+			     zero sym makes SDL_PrivateKeyboard
+			     strange state...  So, just discard it.
+			     If this doesn't work, I'm receiving bug
+			     reports, and I can know under what
+			     condition this case happens.  */
+			  if (keycode) {
+			    keysym.scancode = keycode;
+			    keysym.sym = X11_TranslateKeycode(SDL_Display, keycode);
+			    keysym.mod = KMOD_NONE;
+			    keysym.unicode = 0;
+			    posted = SDL_PrivateKeyboard(SDL_PRESSED, &keysym);
+			  }
+			  break;
+			}
+
+			case XLookupNone: {
+			  /* IM has eaten the event.  */
+			  break;
+			}
+
+			default:
+			  /* An unknown status from Xutf8LookupString.  */
+			  SDL_SetError("Oops! Xutf8LookupStringreturned an unknown status");
+			}
+
+			/* Release dynamic buffers if allocated.  */
+			if (keydata != NULL && keybuf != keydata) {
+			  SDL_free(keydata);
+			}
+			if (utf16data != NULL && utf16buf != utf16data) {
+			  SDL_free(utf16data);
 			}
 		}
 		else
@@ -472,8 +733,9 @@
 				*/
 				keysym.unicode = (Uint8)keybuf[0];
 			}
+
+			posted = SDL_PrivateKeyboard(SDL_PRESSED, &keysym);
 		}
-		posted = SDL_PrivateKeyboard(SDL_PRESSED, &keysym);
 	    }
 	    break;
 
@@ -482,13 +744,17 @@
 		SDL_keysym keysym;
 		KeyCode keycode = xevent.xkey.keycode;
 
+		if (keycode == 0) {
+		  /* There should be no KeyRelease for keycode == 0,
+		     since it is a notification from IM but a real
+		     keystroke.  */
+		  /* We need to emit some diagnostic message here.  */
+		  break;
+		}
+
 #ifdef DEBUG_XEVENTS
 printf("KeyRelease (X11 keycode = 0x%X)\n", xevent.xkey.keycode);
 #endif
-		/* Check to see if this is a repeated key */
-		if ( X11_KeyRepeat(SDL_Display, &xevent) ) {
-			break;
-		}
 
 		/* Get the translated SDL virtual keysym */
 		keysym.scancode = keycode;