From 383f6b6949b729f3285180260f6a677390ec4b9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20=C4=8Ciha=C5=99?= Date: Wed, 14 Aug 2002 16:18:51 +0000 Subject: [PATCH] Improvements to importing/exporting when utf-8 translation is being used: * Improved detection of used recoding function, now can be configured which function should be preffered ($cfg['RecodingEngine']). * read_dump.php3 uses new fuction PMA_convert_string instead of hardcoded iconv. * Support for converting charset of loaded files, used new function PMA_convert_file. * Support for exporting in custom charset. --- ChangeLog | 13 ++ Documentation.html | 13 ++ config.inc.php3 | 8 + db_details_export.php3 | 21 +++ ldi_check.php3 | 7 + ldi_table.php3 | 25 +++ libraries/charset_conversion.lib.php3 | 241 ++++++++++++++++++++++---- read_dump.php3 | 2 +- tbl_dump.php3 | 29 +++- tbl_properties_export.php3 | 21 +++ 10 files changed, 341 insertions(+), 39 deletions(-) diff --git a/ChangeLog b/ChangeLog index db67b6742..421e7d87f 100755 --- a/ChangeLog +++ b/ChangeLog @@ -5,6 +5,19 @@ phpMyAdmin - Changelog $Id$ $Source$ +2002-08-14 Michal Cihar + * libraries/charset_conversion.lib.php3, config.inc.php3, Documentation.*: + Improved detection of used recoding function, now can be configured + which function should be preffered ($cfg['RecodingEngine']). + * libraries/charset_conversion.lib.php3, read_dump.php3: + read_dump.php3 uses new fuction PMA_convert_string instead of hardcoded + iconv. + * libraries/charset_conversion.lib.php3, ldi_check.php3, ldi_table.php3: + Support for converting charset of loaded files, used new function + PMA_convert_file. + * tbl_dump.php3, db_details_export.php3, tbl_properties_export.php3: + Support for exporting in custom charset. + 2002-08-14 Marc Delisle * lang/japanese update: thanks to Yukihiro Kawada diff --git a/Documentation.html b/Documentation.html index b5a545d34..e2f6999da 100755 --- a/Documentation.html +++ b/Documentation.html @@ -1181,6 +1181,19 @@ $cfg['PmaAbsoluteUri'] = (!empty($_SERVER['HTTPS']) ? 'https' : 'http') . '://'

+
$cfg['RecodingEngine'] string
+
+ You can select here which functions will be used for charset conversion. + Possible values are:
+
    +
  • auto - automatically use available one (first is tested + iconv, then recode)
  • +
  • iconv - use iconv or libiconv functions
  • +
  • recode - use recode_string function
  • +
+ Default is auto. +
+
$cfg['AvailableCharsets'] array
Available charsets for MySQL conversion. You can add your own (any of diff --git a/config.inc.php3 b/config.inc.php3 index b076b8647..0d7d231b0 100755 --- a/config.inc.php3 +++ b/config.inc.php3 @@ -253,6 +253,14 @@ $cfg['DefaultCharset'] = 'iso-8859-1'; // extension and where dl() is not supported $cfg['AllowAnywhereRecoding'] = FALSE; +// You can select here which functions will be used for charset conversion. +// Possible values are: +// auto - automatically use available one (first is tested iconv, then +// recode) +// iconv - use iconv or libiconv functions +// recode - use recode_string function +$cfg['RecodingEngine'] = 'auto'; + // Available charsets for MySQL conversion. currently contains all which could // be found in lang/* files and few more. // Charsets will be shown in same order as here listed, so if you frequently diff --git a/db_details_export.php3 b/db_details_export.php3 index 5b0b0f8e2..8d85459f4 100644 --- a/db_details_export.php3 +++ b/db_details_export.php3 @@ -108,6 +108,27 @@ echo "\n"; > + ' . "\n" + . ' ' . "\n"; + while ($temp_charset = next($cfg['AvailableCharsets'])) { + echo ' ' . "\n"; + } // end while + echo ' '; + } // end if + echo "\n"; + ?> = 40004) { diff --git a/ldi_check.php3 b/ldi_check.php3 index b74c3d057..2b4161a63 100755 --- a/ldi_check.php3 +++ b/ldi_check.php3 @@ -36,6 +36,13 @@ if (isset($btnLDI) && ($textfile != 'none')) { if (function_exists('PMA_kanji_file_conv')) { $textfile = PMA_kanji_file_conv($textfile, $knjenc, isset($xkana) ? $xkana : ''); } + + // Convert the file's charset if necessary + if ($cfg['AllowAnywhereRecoding'] && $allow_recoding + && isset($charset_of_file) && $charset_of_file != $charset) { + $textfile = PMA_convert_file($charset_of_file, $convcharset, $textfile); + } + // Formats the data posted to this script $textfile = PMA_sqlAddslashes($textfile); if (get_magic_quotes_gpc()) { diff --git a/ldi_table.php3 b/ldi_table.php3 index d6bc15ef7..417255b13 100755 --- a/ldi_table.php3 +++ b/ldi_table.php3 @@ -26,6 +26,31 @@ require('./tbl_properties_table_info.php3'); + ' . "\n" + . ' '.$strCharsetOfFile . "\n" + . ' '. "\n" + . ' ' . "\n"; + echo ' ' . "\n"; + echo ' ' . "\n"; + } // end if + echo "\n"; + ?> diff --git a/libraries/charset_conversion.lib.php3 b/libraries/charset_conversion.lib.php3 index e1734c8d4..34f3a0a41 100644 --- a/libraries/charset_conversion.lib.php3 +++ b/libraries/charset_conversion.lib.php3 @@ -24,23 +24,118 @@ if (!defined('PMA_CHARSET_CONVERSION_LIB_INCLUDED')){ && ((PMA_PHP_INT_VERSION >= 40000 && !@ini_get('safe_mode') && @ini_get('enable_dl')) || (PMA_PHP_INT_VERSION < 40000 && PMA_PHP_INT_VERSION > 30009 && !@get_cfg_var('safe_mode'))) && @function_exists('dl')) { - if (!(@extension_loaded('recode') || @extension_loaded('iconv'))) { - if (PMA_IS_WINDOWS) { - $suffix = '.dll'; - } else { - $suffix = '.so'; + + if (PMA_IS_WINDOWS) { + $suffix = '.dll'; + } else { + $suffix = '.so'; + } + + // Initialize configuration for default, if not set: + if (!isset($cfg['RecodingEngine'])) $cfg['RecodingEngine'] = 'auto'; + + if ($cfg['RecodingEngine'] == 'recode') { + if (! @extension_loaded('recode')) { + dl('recode' . $suffix); + if (!@extension_loaded('recode')) { + echo $strCantLoadRecodeIconv; + exit(); + } } - dl('recode' . $suffix); - if (!@extension_loaded('recode')) { + $PMA_recoding_engine = 'recode'; + } elseif ($cfg['RecodingEngine'] == 'iconv') { + if (! @extension_loaded('iconv')) { dl('iconv' . $suffix); if (!@extension_loaded('iconv')) { echo $strCantLoadRecodeIconv; exit(); } } + $PMA_recoding_engine = 'iconv'; + } else { + if (@extension_loaded('iconv')) { + $PMA_recoding_engine = 'iconv'; + } elseif (@extension_loaded('recode')) { + $PMA_recoding_engine = 'recode'; + } else { + dl('iconv' . $suffix); + if (!@extension_loaded('iconv')) { + dl('recode' . $suffix); + if (!@extension_loaded('recode')) { + echo $strCantLoadRecodeIconv; + exit(); + } else { + $PMA_recoding_engine = 'recode'; + } + } else { + $PMA_recoding_engine = 'iconv'; + } + } } - } // end load mysql extension + } // end load recode/iconv extension + + define('PMA_CHARSET_NONE', 0); + define('PMA_CHARSET_ICONV', 1); + define('PMA_CHARSET_LIBICONV', 2); + define('PMA_CHARSET_RECODE', 3); + // finally detect which function will we use: + if (isset($cfg['AllowAnywhereRecoding']) + && $cfg['AllowAnywhereRecoding'] + && $allow_recoding) { + + if (!isset($PMA_recoding_engine)) $PMA_recoding_engine = $cfg['RecodingEngine']; + if ($PMA_recoding_engine == 'iconv') { + if (@function_exists('iconv')) { + $PMA_recoding_engine = PMA_CHARSET_ICONV; + } else if (@function_exists('libiconv')) { + $PMA_recoding_engine = PMA_CHARSET_LIBICONV; + } else { + $PMA_recoding_engine = PMA_CHARSET_NONE; + + if (!isset($GLOBALS['is_header_sent'])) { + include('./header.inc.php3'); + } + echo $strCantUseRecodeIconv; + include('./footer.inc.php3'); + exit(); + } + } elseif ($PMA_recoding_engine == 'recode') { + if (@function_exists('recode_string')) { + $PMA_recoding_engine = PMA_CHARSET_RECODE; + } else { + $PMA_recoding_engine = PMA_CHARSET_NONE; + + + if (!isset($GLOBALS['is_header_sent'])) { + include('./header.inc.php3'); + } + echo $strCantUseRecodeIconv; + include('./footer.inc.php3'); + exit(); + } + } else { + if (@function_exists('iconv')) { + $PMA_recoding_engine = PMA_CHARSET_ICONV; + } else if (@function_exists('libiconv')) { + $PMA_recoding_engine = PMA_CHARSET_LIBICONV; + } elseif (@function_exists('recode_string')) { + $PMA_recoding_engine = PMA_CHARSET_RECODE; + } else { + $PMA_recoding_engine = PMA_CHARSET_NONE; + + + if (!isset($GLOBALS['is_header_sent'])) { + include('./header.inc.php3'); + } + echo $strCantUseRecodeIconv; + include('./footer.inc.php3'); + exit(); + } + } + } else { + $PMA_recoding_engine = PMA_CHARSET_NONE; + } /** * Converts encoding according to current settings. @@ -68,8 +163,6 @@ if (!defined('PMA_CHARSET_CONVERSION_LIB_INCLUDED')){ $result = array(); reset($what); while(list($key, $val) = each($what)) { -//Debug: echo '['.$key.'] = ' . $val . '
'; - if (is_string($val) || is_array($val)) { if (is_string($key)) { $result[PMA_convert_display_charset($key)] = PMA_convert_display_charset($val); @@ -83,16 +176,18 @@ if (!defined('PMA_CHARSET_CONVERSION_LIB_INCLUDED')){ return $result; } else if (is_string($what)) { - if (@function_exists('iconv')) { -//Debug: echo 'PMA_convert_display_charset: ' . $what . '->' . iconv($convcharset, $charset, $what) . "\n
"; - return iconv($convcharset, $charset, $what); - } else if (@function_exists('libiconv')) { - return libiconv($convcharset, $charset, $what); - } else if (@function_exists('recode_string')) { - return recode_string($convcharset . '..' . $charset, $what); - } else { - echo $GLOBALS['strCantUseRecodeIconv']; - return $what; + switch ($GLOBALS['PMA_recoding_engine']) { + case PMA_CHARSET_RECODE: + return recode_string($convcharset . '..' . $charset, $what); + break; + case PMA_CHARSET_ICONV: + return iconv($convcharset, $charset, $what); + break; + case PMA_CHARSET_LIBICONV: + return libiconv($convcharset, $charset, $what); + break; + default: + return $what; } } else if (is_object($what)) { @@ -136,25 +231,99 @@ if (!defined('PMA_CHARSET_CONVERSION_LIB_INCLUDED')){ if (!(isset($cfg['AllowAnywhereRecoding']) && $cfg['AllowAnywhereRecoding'] && $allow_recoding)) { return $what; - } - else { - if (@function_exists('iconv')) { -//Debug: echo 'PMA_convert_charset: ' . $what . '->' . iconv($charset, $convcharset, $what) . "\n
"; - return iconv($charset, $convcharset, $what); - } else if (@function_exists('libiconv')) { - return libiconv($charset, $convcharset, $what); - } else if (@function_exists('recode_string')) { - return recode_string($charset . '..' . $convcharset, $what); - } else { - if (!isset($GLOBALS['is_header_sent'])) { - include('./header.inc.php3'); - } - echo $GLOBALS['strCantUseRecodeIconv']; - include('./footer.inc.php3'); - exit(); + } else { + switch ($GLOBALS['PMA_recoding_engine']) { + case PMA_CHARSET_RECODE: + return recode_string($charset . '..' . $convcharset, $what); + break; + case PMA_CHARSET_ICONV: + return iconv($charset, $convcharset, $what); + break; + case PMA_CHARSET_LIBICONV: + return libiconv($charset, $convcharset, $what); + break; + default: + return $what; } } } // end of the "PMA_convert_charset()" function + /** + * Converts encoding of text according to pametres with detected + * conversion function. + * + * @param string source charset + * @param string target charset + * @param string what to convert + * + * @return string converted text + * + * @access public + * + * @author nijel + */ + function PMA_convert_string($src_charset, $dest_charset, $what) { + switch ($GLOBALS['PMA_recoding_engine']) { + case PMA_CHARSET_RECODE: + return recode_string($src_charset . '..' . $dest_charset, $what); + break; + case PMA_CHARSET_ICONV: + return iconv($src_charset, $dest_charset, $what); + break; + case PMA_CHARSET_LIBICONV: + return libiconv($src_charset, $dest_charset, $what); + break; + default: + return $what; + } + } // end of the "PMA_convert_string()" function + + /** + * Converts encoding of file according to pametres with detected + * conversion function. The old file will be unlinked and new created and + * its file name is returned. + * + * @param string source charset + * @param string target charset + * @param string file to convert + * + * @return string new temporay file + * + * @access public + * + * @author nijel + */ + function PMA_convert_file($src_charset, $dest_charset, $file) { + switch ($GLOBALS['PMA_recoding_engine']) { + case PMA_CHARSET_RECODE: + case PMA_CHARSET_ICONV: + case PMA_CHARSET_LIBICONV: + $tmpfname = tempnam('', 'PMA_convert_file'); + $fin = fopen($file, 'r'); + $fout = fopen($tmpfname, 'w'); + if ($GLOBALS['PMA_recoding_engine'] == PMA_CHARSET_RECODE) { + recode_file($src_charset . '..' . $dest_charset, $fin, $fout); + } else { + while (!feof($fin)) { + $line = fgets($fin, 4096); + if ($GLOBALS['PMA_recoding_engine'] == PMA_CHARSET_ICONV) { + $dist = iconv($src_charset, $dest_charset, $line); + } else { + $dist = libiconv($src_charset, $dest_charset, $line); + } + fputs($fout, $dist); + } // end while + } + fclose($fin); + fclose($fout); + unlink($file); + + return $tmpfname; + break; + default: + return $file; + } + } // end of the "PMA_convert_file()" function + } // $__PMA_CHARSET_CONVERSION_LIB__ ?> diff --git a/read_dump.php3 b/read_dump.php3 index 4491f454e..71a267caa 100644 --- a/read_dump.php3 +++ b/read_dump.php3 @@ -277,7 +277,7 @@ if ($sql_file != 'none') { // Convert the file's charset if necessary if ($cfg['AllowAnywhereRecoding'] && $allow_recoding && isset($charset_of_file) && $charset_of_file != $charset) { - $sql_query = iconv($charset_of_file, $charset, $sql_query); + $sql_query = PMA_convert_string($charset_of_file, $charset, $sql_query); } } // end uploaded file stuff } diff --git a/tbl_dump.php3 b/tbl_dump.php3 index c317c13b1..a4a9a69d4 100755 --- a/tbl_dump.php3 +++ b/tbl_dump.php3 @@ -18,6 +18,14 @@ function PMA_myHandler($sql_insert) if (function_exists('PMA_kanji_str_conv')) { $sql_insert = PMA_kanji_str_conv($sql_insert, $GLOBALS['knjenc'], isset($GLOBALS['xkana']) ? $GLOBALS['xkana'] : ''); } + + // Convert the charset if required. + if ($GLOBALS['cfg']['AllowAnywhereRecoding'] && $GLOBALS['allow_recoding'] + && isset($GLOBALS['charset_of_file']) && $GLOBALS['charset_of_file'] != $GLOBALS['charset'] + && (!empty($GLOBALS['asfile']))) { + $sql_insert = PMA_convert_string($GLOBALS['charset'], $GLOBALS['charset_of_file'], $sql_insert); + } + // Defines the end of line delimiter to use $eol_dlm = (isset($GLOBALS['extended_ins']) && ($GLOBALS['current_row'] < $GLOBALS['rows_cnt'])) ? ',' @@ -59,6 +67,12 @@ function PMA_myCsvHandler($sql_insert) if (function_exists('PMA_kanji_str_conv')) { $sql_insert = PMA_kanji_str_conv($sql_insert, $GLOBALS['knjenc'], isset($GLOBALS['xkana']) ? $GLOBALS['xkana'] : ''); } + // Convert the charset if required. + if ($GLOBALS['cfg']['AllowAnywhereRecoding'] && $GLOBALS['allow_recoding'] + && isset($GLOBALS['charset_of_file']) && $GLOBALS['charset_of_file'] != $GLOBALS['charset'] + && (!empty($GLOBALS['asfile']))) { + $sql_insert = PMA_convert_string($GLOBALS['charset'], $GLOBALS['charset_of_file'], $sql_insert); + } // Result has to be displayed on screen if (empty($GLOBALS['asfile'])) { echo htmlspecialchars($sql_insert) . $add_character; @@ -131,9 +145,9 @@ if (empty($asfile)) { else { // Defines filename and extension, and also mime types if (!isset($table)) { - $filename = $db; + $filename = PMA_convert_string($convcharset, 'iso8859-1', $db); } else { - $filename = $table; + $filename = PMA_convert_string($charset, 'iso8859-1', $table); } if (isset($bzip) && $bzip == 'bzip') { $ext = 'bz2'; @@ -239,6 +253,12 @@ else { if (function_exists('PMA_kanji_str_conv')) { // Y.Kawada $dump_buffer = PMA_kanji_str_conv($dump_buffer, $knjenc, isset($xkana) ? $xkana : ''); } + // Convert the charset if required. + if ($GLOBALS['cfg']['AllowAnywhereRecoding'] && $GLOBALS['allow_recoding'] + && isset($GLOBALS['charset_of_file']) && $GLOBALS['charset_of_file'] != $GLOBALS['charset'] + && (!empty($GLOBALS['asfile']))) { + $dump_buffer = PMA_convert_string($GLOBALS['charset'], $GLOBALS['charset_of_file'], $dump_buffer); + } // At least data if (($what == 'data') || ($what == 'dataonly')) { $tcmt = $crlf . '#' . $crlf @@ -246,6 +266,11 @@ else { . '#' . $crlf .$crlf; if (function_exists('PMA_kanji_str_conv')) { // Y.Kawada $dump_buffer .= PMA_kanji_str_conv($tcmt, $knjenc, isset($xkana) ? $xkana : ''); + } elseif ($GLOBALS['cfg']['AllowAnywhereRecoding'] && $GLOBALS['allow_recoding'] + // Convert the charset if required. + && isset($GLOBALS['charset_of_file']) && $GLOBALS['charset_of_file'] != $GLOBALS['charset'] + && (!empty($GLOBALS['asfile']))) { + $dump_buffer .= PMA_convert_string($GLOBALS['charset'], $GLOBALS['charset_of_file'], $tcmt); } else { $dump_buffer .= $tcmt; } diff --git a/tbl_properties_export.php3 b/tbl_properties_export.php3 index 61e2d8dc7..145f86b84 100755 --- a/tbl_properties_export.php3 +++ b/tbl_properties_export.php3 @@ -92,6 +92,27 @@ echo "\n"; + ' . "\n" + . ' ' . "\n"; + while ($temp_charset = next($cfg['AvailableCharsets'])) { + echo ' ' . "\n"; + } // end while + echo ' '; + } // end if + echo "\n"; + ?> = 40004) {