Skip byte order marks (bug #1423362).

2006-02-21 20:28:27 +00:00
parent ecc793bf83
commit 56a0ec9638
2 changed files with 14 additions and 0 deletions
--- a/1
+++ b/1
@@ -16,6 +16,7 @@ $Source$
    * libraries/iconv_wrapper.lib.php, libraries/charset_conversion.lib.php,
      libraries/database_interface.lib.php: Compatibility with iconv charset
      names on AIX (patch #1420704, thanks to Björn Wiberg - bwiberg).
+    * libraries/import.lib.php: Skip byte order marks (bug #1423362).

 2006-02-21 Sebastian Mendel <cybot_tm@users.sourceforge.net>
    * libraries/common.lib.php PMA_getUvaCondition():
--- a/libraries/import.lib.php
+++ b/libraries/import.lib.php
@@ -251,6 +251,19 @@ function PMA_importGetNextChunk($size = 32768)
    if ($charset_conversion) {
        return PMA_convert_string($charset_of_file, $charset, $result);
    } else {
+        // Skip possible byte order marks (I do not think we need more
+        // charsets, but feel free to add more, you can use wikipedia for
+        // reference: <http://en.wikipedia.org/wiki/Byte_Order_Mark>)
+        // @TODO: BOM could be used for charset autodetection
+        if ($offset == $size) {
+            // UTF-8
+            if (strncmp($result, "\xEF\xBB\xBF", 3) == 0) {
+                $result = substr($result, 3);
+            // UTF-16 BE, LE
+            } elseif (strncmp($result, "\xFE\xFF", 2) == 0 || strncmp($result, "\xFF\xFE", 2) == 0) {
+                $result = substr($result, 2);
+            }
+        }
        return $result; 
    }
 }