From 0b5023bc6269fbca6507f246f28ad37d7d1d040e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Camilla=20L=C3=B6wy?= <elmindreda@glfw.org>
Date: Tue, 8 Aug 2017 16:25:13 +0200
Subject: [PATCH] X11: Fix Latin-1 text not being converted to UTF-8

---
 README.md        |   1 +
 src/x11_window.c | 155 +++++++++++++++++++++++++++++------------------
 2 files changed, 96 insertions(+), 60 deletions(-)

diff --git a/README.md b/README.md
index 4ca7a0be..40e9544f 100644
--- a/README.md
+++ b/README.md
@@ -207,6 +207,7 @@ information on what to include when reporting a bug.
 - [X11] Bugfix: Key name string encoding depended on current locale (#981,#983)
 - [X11] Bugfix: Incremental reading of selections was not supported (#275)
 - [X11] Bugfix: Selection I/O reported but did not support `COMPOUND_TEXT`
+- [X11] Bugfix: Latin-1 text read from selections was not converted to UTF-8
 - [Linux] Moved to evdev for joystick input (#906,#1005)
 - [Linux] Bugfix: Event processing did not detect joystick disconnection (#932)
 - [Linux] Bugfix: The joystick device path could be truncated (#1025)
diff --git a/src/x11_window.c b/src/x11_window.c
index a0dd81be..ea49e55d 100644
--- a/src/x11_window.c
+++ b/src/x11_window.c
@@ -460,6 +460,81 @@ static char** parseUriList(char* text, int* count)
     return paths;
 }
 
+// Encode a Unicode code point to a UTF-8 stream
+// Based on cutef8 by Jeff Bezanson (Public Domain)
+//
+static size_t encodeUTF8(char* s, unsigned int ch)
+{
+    size_t count = 0;
+
+    if (ch < 0x80)
+        s[count++] = (char) ch;
+    else if (ch < 0x800)
+    {
+        s[count++] = (ch >> 6) | 0xc0;
+        s[count++] = (ch & 0x3f) | 0x80;
+    }
+    else if (ch < 0x10000)
+    {
+        s[count++] = (ch >> 12) | 0xe0;
+        s[count++] = ((ch >> 6) & 0x3f) | 0x80;
+        s[count++] = (ch & 0x3f) | 0x80;
+    }
+    else if (ch < 0x110000)
+    {
+        s[count++] = (ch >> 18) | 0xf0;
+        s[count++] = ((ch >> 12) & 0x3f) | 0x80;
+        s[count++] = ((ch >> 6) & 0x3f) | 0x80;
+        s[count++] = (ch & 0x3f) | 0x80;
+    }
+
+    return count;
+}
+
+// Decode a Unicode code point from a UTF-8 stream
+// Based on cutef8 by Jeff Bezanson (Public Domain)
+//
+#if defined(X_HAVE_UTF8_STRING)
+static unsigned int decodeUTF8(const char** s)
+{
+    unsigned int ch = 0, count = 0;
+    static const unsigned int offsets[] =
+    {
+        0x00000000u, 0x00003080u, 0x000e2080u,
+        0x03c82080u, 0xfa082080u, 0x82082080u
+    };
+
+    do
+    {
+        ch = (ch << 6) + (unsigned char) **s;
+        (*s)++;
+        count++;
+    } while ((**s & 0xc0) == 0x80);
+
+    assert(count <= 6);
+    return ch - offsets[count - 1];
+}
+#endif /*X_HAVE_UTF8_STRING*/
+
+// Convert the specified Latin-1 string to UTF-8
+//
+static char* convertLatin1toUTF8(const char* source)
+{
+    size_t size = 1;
+    const char* sp;
+
+    for (sp = source;  *sp;  sp++)
+        size += (*sp & 0x80) ? 2 : 1;
+
+    char* target = calloc(size, 1);
+    char* tp = target;
+
+    for (sp = source;  *sp;  sp++)
+        tp += encodeUTF8(tp, *sp);
+
+    return target;
+}
+
 // Centers the cursor over the window client area
 //
 static void centerCursor(_GLFWwindow* window)
@@ -915,6 +990,7 @@ static const char* getSelectionString(Atom selection)
         if (actualType == _glfw.x11.INCR)
         {
             size_t size = 1;
+            char* string = NULL;
 
             for (;;)
             {
@@ -943,17 +1019,32 @@ static const char* getSelectionString(Atom selection)
                 if (itemCount)
                 {
                     size += itemCount;
-                    *selectionString = realloc(*selectionString, size);
-                    (*selectionString)[size - itemCount - 1] = '\0';
-                    strcat(*selectionString, data);
+                    string = realloc(string, size);
+                    string[size - itemCount - 1] = '\0';
+                    strcat(string, data);
                 }
 
                 if (!itemCount)
+                {
+                    if (targets[i] == XA_STRING)
+                    {
+                        *selectionString = convertLatin1toUTF8(string);
+                        free(string);
+                    }
+                    else
+                        *selectionString = string;
+
                     break;
+                }
             }
         }
         else if (actualType == targets[i])
-            *selectionString = strdup(data);
+        {
+            if (targets[i] == XA_STRING)
+                *selectionString = convertLatin1toUTF8(data);
+            else
+                *selectionString = strdup(data);
+        }
 
         XFree(data);
 
@@ -1035,62 +1126,6 @@ static void releaseMonitor(_GLFWwindow* window)
     }
 }
 
-// Encode a Unicode code point to a UTF-8 stream
-// Based on cutef8 by Jeff Bezanson (Public Domain)
-//
-static size_t encodeUTF8(char* s, unsigned int ch)
-{
-    size_t count = 0;
-
-    if (ch < 0x80)
-        s[count++] = (char) ch;
-    else if (ch < 0x800)
-    {
-        s[count++] = (ch >> 6) | 0xc0;
-        s[count++] = (ch & 0x3f) | 0x80;
-    }
-    else if (ch < 0x10000)
-    {
-        s[count++] = (ch >> 12) | 0xe0;
-        s[count++] = ((ch >> 6) & 0x3f) | 0x80;
-        s[count++] = (ch & 0x3f) | 0x80;
-    }
-    else if (ch < 0x110000)
-    {
-        s[count++] = (ch >> 18) | 0xf0;
-        s[count++] = ((ch >> 12) & 0x3f) | 0x80;
-        s[count++] = ((ch >> 6) & 0x3f) | 0x80;
-        s[count++] = (ch & 0x3f) | 0x80;
-    }
-
-    return count;
-}
-
-// Decode a Unicode code point from a UTF-8 stream
-// Based on cutef8 by Jeff Bezanson (Public Domain)
-//
-#if defined(X_HAVE_UTF8_STRING)
-static unsigned int decodeUTF8(const char** s)
-{
-    unsigned int ch = 0, count = 0;
-    static const unsigned int offsets[] =
-    {
-        0x00000000u, 0x00003080u, 0x000e2080u,
-        0x03c82080u, 0xfa082080u, 0x82082080u
-    };
-
-    do
-    {
-        ch = (ch << 6) + (unsigned char) **s;
-        (*s)++;
-        count++;
-    } while ((**s & 0xc0) == 0x80);
-
-    assert(count <= 6);
-    return ch - offsets[count - 1];
-}
-#endif /*X_HAVE_UTF8_STRING*/
-
 // Process the specified X event
 //
 static void processEvent(XEvent *event)