View Issue Details

IDProjectCategoryView StatusLast Update
0001277OpenMPT[All Projects] libopenmptpublic2019-11-01 12:20
Reportermanx Assigned Tomanx  
PrioritynormalSeverityminorReproducibilityhave not tried
Status resolvedResolutionfixed 
Product VersionOpenMPT 1.29.00.* (current testing) 
Target VersionOpenMPT 1.29 / libopenmpt 0.5 (goals)Fixed in VersionOpenMPT 1.29 / libopenmpt 0.5 (goals) 
Summary0001277: remove iconv support
Description

mpt::String currently supports 3 means of character set conversions: WIN32, internal table based conversion, and iconv. Due to availability, iconv is only used on Linux by default (where it is included in glibc). However, iconv does not provide as any actual benefits compared to internal table based conversions. To the contrary, it increases the testing matrix, introduces less-tested code paths, complicates the already pretty convoluted character set conversion code paths, and thereby introduces maintenance burden.

TagsNo tags attached.
Has the bug occurred in previous versions?
Tested code revision (in case you know it)

Activities

manx

manx

2019-11-01 10:39

administrator  

remove-iconv-v1.patch (13,157 bytes)
Index: build/make/config-defaults.mk
===================================================================
--- build/make/config-defaults.mk	(revision 12273)
+++ build/make/config-defaults.mk	(working copy)
@@ -7,9 +7,6 @@
 # Mac OS X overrides
 DYNLINK=0
 SHARED_SONAME=0
-# when using iconv
-#CPPFLAGS += -DMPT_WITH_ICONV
-#LDLIBS += -liconv
 
 else ifeq ($(HOST_FLAVOUR),LINUX)
 
Index: common/BuildSettings.h
===================================================================
--- common/BuildSettings.h	(revision 12273)
+++ common/BuildSettings.h	(working copy)
@@ -92,7 +92,6 @@
 // OpenMPT and libopenmpt dependencies (not for openmp123, player plugins or examples)
 //#define MPT_WITH_DL
 #define MPT_WITH_FLAC
-//#define MPT_WITH_ICONV
 //#define MPT_WITH_LTDL
 #if MPT_OS_WINDOWS
 #define MPT_WITH_MEDIAFOUNDATION
@@ -124,7 +123,6 @@
 
 //#define MPT_WITH_DL
 //#define MPT_WITH_FLAC
-//#define MPT_WITH_ICONV
 //#define MPT_WITH_LTDL
 //#define MPT_WITH_MEDIAFOUNDATION
 #define MPT_WITH_MINIMP3
@@ -140,7 +138,6 @@
 
 //#define MPT_WITH_DL
 //#define MPT_WITH_FLAC
-//#define MPT_WITH_ICONV
 //#define MPT_WITH_LTDL
 //#define MPT_WITH_MEDIAFOUNDATION
 //#define MPT_WITH_MINIMP3
@@ -171,7 +168,6 @@
 
 //#define MPT_WITH_DL
 //#define MPT_WITH_FLAC
-//#define MPT_WITH_ICONV
 //#define MPT_WITH_LTDL
 //#define MPT_WITH_MEDIAFOUNDATION
 //#define MPT_WITH_MINIMP3
@@ -317,7 +313,7 @@
 
 #elif MPT_OS_LINUX
 
-	#define MPT_CHARSET_ICONV
+	#define MPT_CHARSET_INTERNAL
 
 #elif MPT_OS_ANDROID
 
@@ -332,17 +328,7 @@
 
 #elif MPT_OS_MACOSX_OR_IOS
 
-	#if defined(MPT_WITH_ICONV)
-		#define MPT_CHARSET_ICONV
-		#ifndef MPT_ICONV_NO_WCHAR
-		#define MPT_ICONV_NO_WCHAR
-		#endif
-	#else
-		#define MPT_CHARSET_INTERNAL
-	#endif
-	//#ifndef MPT_LOCALE_ASSUME_CHARSET
-	//#define MPT_LOCALE_ASSUME_CHARSET CharsetUTF8
-	//#endif
+	#define MPT_CHARSET_INTERNAL
 
 #elif MPT_OS_DJGPP
 
@@ -351,10 +337,6 @@
 	#define MPT_LOCALE_ASSUME_CHARSET CharsetCP437
 	#endif
 
-#elif defined(MPT_WITH_ICONV)
-
-	#define MPT_CHARSET_ICONV
-
 #endif
 
 
@@ -498,7 +480,7 @@
 #define MPT_ENABLE_TEMPFILE
 #endif
 
-#if !defined(MPT_CHARSET_WIN32) && !defined(MPT_CHARSET_ICONV) && !defined(MPT_CHARSET_INTERNAL)
+#if !defined(MPT_CHARSET_WIN32) && !defined(MPT_CHARSET_INTERNAL)
 #define MPT_CHARSET_INTERNAL
 #endif
 
Index: common/mptString.cpp
===================================================================
--- common/mptString.cpp	(revision 12274)
+++ common/mptString.cpp	(working copy)
@@ -32,12 +32,7 @@
 #include <windows.h>
 #endif
 
-#if defined(MPT_CHARSET_ICONV)
-#include <errno.h>
-#include <iconv.h>
-#endif
 
-
 OPENMPT_NAMESPACE_BEGIN
 
 
@@ -1057,88 +1052,9 @@
 
 #endif // MPT_CHARSET_WIN32
 
-#if defined(MPT_CHARSET_ICONV)
 
-static const char * CharsetToString(Charset charset)
-{
-	switch(charset)
-	{
-#if defined(MPT_ENABLE_CHARSET_LOCALE)
-		case CharsetLocale:      return "";            break; // "char" breaks with glibc when no locale is set
-#endif
-		case CharsetUTF8:        return "UTF-8";       break;
-		case CharsetASCII:       return "ASCII";       break;
-		case CharsetISO8859_1:   return "ISO-8859-1";  break;
-		case CharsetISO8859_15:  return "ISO-8859-15"; break;
-		case CharsetCP437:       return "CP437";       break;
-		case CharsetCP437AMS:    return "CP437";       break; // fallback, should not happen
-		case CharsetCP437AMS2:   return "CP437";       break; // fallback, should not happen
-		case CharsetWindows1252: return "CP1252";      break;
-	}
-	return 0;
-}
-
-static const char * CharsetToStringTranslit(Charset charset)
-{
-	switch(charset)
-	{
-#if defined(MPT_ENABLE_CHARSET_LOCALE)
-		case CharsetLocale:      return "//TRANSLIT";            break; // "char" breaks with glibc when no locale is set
-#endif
-		case CharsetUTF8:        return "UTF-8//TRANSLIT";       break;
-		case CharsetASCII:       return "ASCII//TRANSLIT";       break;
-		case CharsetISO8859_1:   return "ISO-8859-1//TRANSLIT";  break;
-		case CharsetISO8859_15:  return "ISO-8859-15//TRANSLIT"; break;
-		case CharsetCP437:       return "CP437//TRANSLIT";       break;
-		case CharsetCP437AMS:    return "CP437//TRANSLIT";       break; // fallback, should not happen
-		case CharsetCP437AMS2:   return "CP437//TRANSLIT";       break; // fallback, should not happen
-		case CharsetWindows1252: return "CP1252//TRANSLIT";      break;
-	}
-	return 0;
-}
-
-static const char * Charset_widechar()
-{
-	#if !defined(MPT_ICONV_NO_WCHAR) && !defined(MPT_COMPILER_QUIRK_NO_WCHAR)
-		return "wchar_t";
-	#else // MPT_ICONV_NO_WCHAR
-		// iconv on OSX does not handle wchar_t if no locale is set
-		static_assert(sizeof(widechar) == 2 || sizeof(widechar) == 4);
-		if(sizeof(widechar) == 2)
-		{
-			// "UTF-16" generates BOM
-			MPT_MAYBE_CONSTANT_IF(mpt::endian_is_little())
-			{
-				return "UTF-16LE";
-			}
-			MPT_MAYBE_CONSTANT_IF(mpt::endian_is_big())
-			{
-				return "UTF-16BE";
-			}
-			return "UTF-16";
-		} else if(sizeof(widechar) == 4)
-		{
-			// "UTF-32" generates BOM
-			MPT_MAYBE_CONSTANT_IF(mpt::endian_is_little())
-			{
-				return "UTF-32LE";
-			}
-			MPT_MAYBE_CONSTANT_IF(mpt::endian_is_big())
-			{
-				return "UTF-32BE";
-			}
-			return "UTF-32";
-		}
-	#endif // !MPT_ICONV_NO_WCHAR | MPT_ICONV_NO_WCHAR
-}
-
-#endif // MPT_CHARSET_ICONV
-
-
-#if !defined(MPT_CHARSET_ICONV)
 template<typename Tdststring>
 static Tdststring EncodeImplFallback(Charset charset, const widestring &src);
-#endif // !MPT_CHARSET_ICONV
 
 // templated on 8bit strings because of type-safe variants
 template<typename Tdststring>
@@ -1176,43 +1092,6 @@
 		WideCharToMultiByte(codepage, 0, src.c_str(), -1, encoded_string.data(), required_size, nullptr, nullptr);
 		encoded_string.resize(encoded_string.size() - 1); // remove \0
 		return encoded_string;
-	#elif defined(MPT_CHARSET_ICONV)
-		iconv_t conv = iconv_t();
-		conv = iconv_open(CharsetToStringTranslit(charset), Charset_widechar());
-		if(!conv)
-		{
-			conv = iconv_open(CharsetToString(charset), Charset_widechar());
-			if(!conv)
-			{
-				throw std::runtime_error("iconv conversion not working");
-			}
-		}
-		std::vector<widechar> wide_string(src.c_str(), src.c_str() + src.length() + 1);
-		std::vector<char> encoded_string(wide_string.size() * 8); // large enough
-		char * inbuf = reinterpret_cast<char*>(wide_string.data());
-		size_t inbytesleft = wide_string.size() * sizeof(widechar);
-		char * outbuf = encoded_string.data();
-		size_t outbytesleft = encoded_string.size();
-		while(iconv(conv, &inbuf, &inbytesleft, &outbuf, &outbytesleft) == static_cast<size_t>(-1))
-		{
-			if(errno == EILSEQ || errno == EILSEQ)
-			{
-				inbuf += sizeof(widechar);
-				inbytesleft -= sizeof(widechar);
-				outbuf[0] = '?';
-				outbuf++;
-				outbytesleft--;
-				iconv(conv, NULL, NULL, NULL, NULL); // reset state
-			} else
-			{
-				iconv_close(conv);
-				conv = iconv_t();
-				return Tdststring();
-			}
-		}
-		iconv_close(conv);
-		conv = iconv_t();
-		return reinterpret_cast<const typename Tdststring::value_type*>(encoded_string.data());
 	#else
 		return EncodeImplFallback<Tdststring>(charset, src);
 	#endif
@@ -1219,7 +1098,6 @@
 }
 
 
-#if !defined(MPT_CHARSET_ICONV)
 template<typename Tdststring>
 static Tdststring EncodeImplFallback(Charset charset, const widestring &src)
 {
@@ -1252,13 +1130,10 @@
 		}
 		return Tdststring(out.begin(), out.end());
 }
-#endif // !MPT_CHARSET_ICONV
 
 
-#if !defined(MPT_CHARSET_ICONV)
 template<typename Tsrcstring>
 static widestring DecodeImplFallback(Charset charset, const Tsrcstring &src);
-#endif // !MPT_CHARSET_ICONV
 
 // templated on 8bit strings because of type-safe variants
 template<typename Tsrcstring>
@@ -1297,50 +1172,11 @@
 		MultiByteToWideChar(codepage, 0, reinterpret_cast<const char*>(src.c_str()), -1, decoded_string.data(), required_size);
 		decoded_string.resize(decoded_string.size() - 1); // remove \0
 		return decoded_string;
-	#elif defined(MPT_CHARSET_ICONV)
-		iconv_t conv = iconv_t();
-		conv = iconv_open(Charset_widechar(), CharsetToString(charset));
-		if(!conv)
-		{
-			throw std::runtime_error("iconv conversion not working");
-		}
-		std::vector<char> encoded_string(reinterpret_cast<const char*>(src.c_str()), reinterpret_cast<const char*>(src.c_str()) + src.length() + 1);
-		std::vector<widechar> wide_string(encoded_string.size() * 8); // large enough
-		char * inbuf = encoded_string.data();
-		size_t inbytesleft = encoded_string.size();
-		char * outbuf = reinterpret_cast<char*>(wide_string.data());
-		size_t outbytesleft = wide_string.size() * sizeof(widechar);
-		while(iconv(conv, &inbuf, &inbytesleft, &outbuf, &outbytesleft) == static_cast<size_t>(-1))
-		{
-			if(errno == EILSEQ || errno == EILSEQ)
-			{
-				inbuf++;
-				inbytesleft--;
-				for(std::size_t i = 0; i < sizeof(widechar); ++i)
-				{
-					outbuf[i] = 0;
-				}
-				widechar tmp = 0xfffd;
-				std::memcpy(outbuf, &tmp, sizeof(widechar));
-				outbuf += sizeof(widechar);
-				outbytesleft -= sizeof(widechar);
-				iconv(conv, NULL, NULL, NULL, NULL); // reset state
-			} else
-			{
-				iconv_close(conv);
-				conv = iconv_t();
-				return widestring();
-			}
-		}
-		iconv_close(conv);
-		conv = iconv_t();
-		return wide_string.data();
 	#else
 		return DecodeImplFallback<Tsrcstring>(charset, src);
 	#endif
 }
 
-#if !defined(MPT_CHARSET_ICONV)
 template<typename Tsrcstring>
 static widestring DecodeImplFallback(Charset charset, const Tsrcstring &src)
 {
@@ -1374,7 +1210,6 @@
 		}
 		return out;
 }
-#endif // !MPT_CHARSET_ICONV
 
 
 // templated on 8bit strings because of type-safe variants
@@ -1389,50 +1224,7 @@
 		const typename Tsrcstring::value_type * src_end = src_beg + src.size();
 		return Tdststring(reinterpret_cast<const typename Tdststring::value_type *>(src_beg), reinterpret_cast<const typename Tdststring::value_type *>(src_end));
 	}
-	#if defined(MPT_CHARSET_ICONV)
-		if(to == CharsetCP437AMS || to == CharsetCP437AMS2 || from == CharsetCP437AMS || from == CharsetCP437AMS2)
-		{
-			return EncodeImpl<Tdststring>(to, DecodeImpl(from, src));
-		}
-		iconv_t conv = iconv_t();
-		conv = iconv_open(CharsetToStringTranslit(to), CharsetToString(from));
-		if(!conv)
-		{
-			conv = iconv_open(CharsetToString(to), CharsetToString(from));
-			if(!conv)
-			{
-				throw std::runtime_error("iconv conversion not working");
-			}
-		}
-		std::vector<char> src_string(reinterpret_cast<const char*>(src.c_str()), reinterpret_cast<const char*>(src.c_str()) + src.length() + 1);
-		std::vector<char> dst_string(src_string.size() * 8); // large enough
-		char * inbuf = src_string.data();
-		size_t inbytesleft = src_string.size();
-		char * outbuf = dst_string.data();
-		size_t outbytesleft = dst_string.size();
-		while(iconv(conv, &inbuf, &inbytesleft, &outbuf, &outbytesleft) == static_cast<size_t>(-1))
-		{
-			if(errno == EILSEQ || errno == EILSEQ)
-			{
-				inbuf++;
-				inbytesleft--;
-				outbuf[0] = '?';
-				outbuf++;
-				outbytesleft--;
-				iconv(conv, NULL, NULL, NULL, NULL); // reset state
-			} else
-			{
-				iconv_close(conv);
-				conv = iconv_t();
-				return Tdststring();
-			}
-		}
-		iconv_close(conv);
-		conv = iconv_t();
-		return reinterpret_cast<const typename Tdststring::value_type*>(dst_string.data());
-	#else
-		return EncodeImpl<Tdststring>(to, DecodeImpl(from, src));
-	#endif
+	return EncodeImpl<Tdststring>(to, DecodeImpl(from, src));
 }
 
 
Index: common/version.cpp
===================================================================
--- common/version.cpp	(revision 12273)
+++ common/version.cpp	(working copy)
@@ -320,9 +320,6 @@
 		#if defined(MPT_CHARSET_WIN32)
 			UL_(" +WINAPI")
 		#endif
-		#if defined(MPT_CHARSET_ICONV)
-			UL_(" +ICONV")
-		#endif
 		#if defined(MPT_CHARSET_INTERNAL)
 			UL_(" +INTERNALCHARSETS")
 		#endif
Index: libopenmpt/dox/changelog.md
===================================================================
--- libopenmpt/dox/changelog.md	(revision 12273)
+++ libopenmpt/dox/changelog.md	(working copy)
@@ -11,6 +11,7 @@
     supported.
 
  *  [**Change**] std::istream based file I/O has been speed up.
+ *  [**Change**] Dependency on iconv on Linux has been removed.
 
  *  [**Regression**] foo_openmpt: foo_openmpt is discontinued. Please use
     Kode54's fork foo_openmpt54:
@@ -25,6 +26,8 @@
  *  [**Regression**] Building with Android NDK older than NDK r18b is not
     supported any more.
  *  [**Regression**] Windows XP and Windows Vista are no longer supported.
+ *  [**Regression**] It is no longer possible to optionally use iconv for
+    character set conversions.
 
 ### libopenmpt 0.4.0
 
Index: libopenmpt/dox/dependencies.md
===================================================================
--- libopenmpt/dox/dependencies.md	(revision 12273)
+++ libopenmpt/dox/dependencies.md	(working copy)
@@ -93,11 +93,6 @@
 
 ### libopenmpt
 
- *  Character set conversion can use one of:
-     *  **Win32**
-     *  **iconv**
-     *  **C++11** codecvt_utf8
-    instead of the internal conversion tables and functions.
  *  **doxygen 1.8** or higher is required to build the documentation.
 
 ### openmpt123
remove-iconv-v1.patch (13,157 bytes)
manx

manx

2019-11-01 10:47

administrator   ~0004127

Also, the current approach on implicitly defaulting to iconv is probably problematic on embedded linux system which use a different libc than glibc.

manx

manx

2019-11-01 12:20

administrator   ~0004128

r12276

Issue History

Date Modified Username Field Change
2019-11-01 10:39 manx New Issue
2019-11-01 10:39 manx Status new => assigned
2019-11-01 10:39 manx Assigned To => manx
2019-11-01 10:39 manx File Added: remove-iconv-v1.patch
2019-11-01 10:47 manx Note Added: 0004127
2019-11-01 10:56 manx Description Updated View Revisions
2019-11-01 12:20 manx Status assigned => resolved
2019-11-01 12:20 manx Resolution open => fixed
2019-11-01 12:20 manx Fixed in Version => OpenMPT 1.29 / libopenmpt 0.5 (goals)
2019-11-01 12:20 manx Note Added: 0004128