Commit f466203067aada0a8bfd8c39267028ac9a54245c

Now the tests for Amarok::cleanPath() pass.
This required some Unicode magic. In some normalized Unicode forms accents are seperate characters. So after normalizing we can simply remove them and should catch all kinds of accented signs this way.
src/App.cpp
(18 / 39)
  
916916
917917 QString cleanPath( const QString &path )
918918 {
919 /* Unicode uses combining characters to form accented versions of other characters.
920 * (Exception: Latin-1 table for compatibility with ASCII.)
921 * Those can be found in the Unicode tables listed at:
922 * http://en.wikipedia.org/w/index.php?title=Combining_character&oldid=255990982
923 * Removing those characters removes accents. :) */
919924 QString result = path;
920 // german umlauts
925
926 // German umlauts
921927 result.replace( QChar(0x00e4), "ae" ).replace( QChar(0x00c4), "Ae" );
922928 result.replace( QChar(0x00f6), "oe" ).replace( QChar(0x00d6), "Oe" );
923929 result.replace( QChar(0x00fc), "ue" ).replace( QChar(0x00dc), "Ue" );
924930 result.replace( QChar(0x00df), "ss" );
925931
926 // some strange accents
927 result.replace( QChar(0x00e7), "c" ).replace( QChar(0x00c7), "C" );
928 result.replace( QChar(0x00fd), "y" ).replace( QChar(0x00dd), "Y" );
929 result.replace( QChar(0x00f1), "n" ).replace( QChar(0x00d1), "N" );
932 // other special cases
933 result.replace( QChar(0x00C6), "AE" );
934 result.replace( QChar(0x00E6), "ae" );
930935
931 // czech letters with carons
932 result.replace( QChar(0x0161), "s" ).replace( QChar(0x0160), "S" );
933 result.replace( QChar(0x010d), "c" ).replace( QChar(0x010c), "C" );
934 result.replace( QChar(0x0159), "r" ).replace( QChar(0x0158), "R" );
935 result.replace( QChar(0x017e), "z" ).replace( QChar(0x017d), "Z" );
936 result.replace( QChar(0x0165), "t" ).replace( QChar(0x0164), "T" );
937 result.replace( QChar(0x0148), "n" ).replace( QChar(0x0147), "N" );
938 result.replace( QChar(0x010f), "d" ).replace( QChar(0x010e), "D" );
936 result.replace( QChar(0x00D8), "OE" );
937 result.replace( QChar(0x00F8), "oe" );
939938
940 // accented vowels
941 QChar a[] = { 'a', 0xe0,0xe1,0xe2,0xe3,0xe5, 0 };
942 QChar A[] = { 'A', 0xc0,0xc1,0xc2,0xc3,0xc5, 0 };
943 QChar E[] = { 'e', 0xe8,0xe9,0xea,0xeb,0x11a, 0 };
944 QChar e[] = { 'E', 0xc8,0xc9,0xca,0xcb,0x11b, 0 };
945 QChar i[] = { 'i', 0xec,0xed,0xee,0xef, 0 };
946 QChar I[] = { 'I', 0xcc,0xcd,0xce,0xcf, 0 };
947 QChar o[] = { 'o', 0xf2,0xf3,0xf4,0xf5,0xf8, 0 };
948 QChar O[] = { 'O', 0xd2,0xd3,0xd4,0xd5,0xd8, 0 };
949 QChar u[] = { 'u', 0xf9,0xfa,0xfb,0x16e, 0 };
950 QChar U[] = { 'U', 0xd9,0xda,0xdb,0x16f, 0 };
951 QChar nul[] = { 0 };
952 QChar *replacements[] = { a, A, e, E, i, I, o, O, u, U, nul };
939 // normalize in a form where accents are seperate characters
940 result = result.normalized( QString::NormalizationForm_D );
953941
954 for( int i = 0; i < result.length(); i++ )
942 // remove accents from table "Combining Diacritical Marks"
943 for( int i = 0x0300; i <= 0x036F; i++ )
955944 {
956 QChar c = result[ i ];
957 for( uint n = 0; replacements[n][0] != QChar(0); n++ )
958 {
959 for( uint k=0; replacements[n][k] != QChar(0); k++ )
960 {
961 if( replacements[n][k] == c )
962 {
963 c = replacements[n][0];
964 }
965 }
966 }
967 result[ i ] = c;
945 result.remove( QChar( i ) );
968946 }
947
969948 return result;
970949 }
971950
  
7979 QCOMPARE( Amarok::cleanPath( QString( "ÁČĎÉĚÍŇÓŘŠŤÚŮÝŽ" ) ), QString( "ACDEEINORSTUUYZ" ) );
8080
8181 /* Skandinavian languages */
82 QCOMPARE( Amarok::cleanPath( QString( "åø" ) ), QString( "ao" ) );
83 QCOMPARE( Amarok::cleanPath( QString( "Ø" ) ), QString( "AO" ) );
82 QCOMPARE( Amarok::cleanPath( QString( "ø" ) ), QString( "aoe" ) );
83 QCOMPARE( Amarok::cleanPath( QString( "ÅØ" ) ), QString( "AOE" ) );
8484
8585 /* Spanish */
8686 QCOMPARE( Amarok::cleanPath( QString( "ñóÿ" ) ), QString( "noy" ) );
8787 QCOMPARE( Amarok::cleanPath( QString( "ÑÓŸ" ) ), QString( "NOY" ) );
8888
89 /* if they exist: add missing ones here */
89 /* add missing ones here */
9090}
9191
9292void TestAmarok::testComputeScore()