화요일, 8월 18, 2009

String Manipulation

// 한글, 영문, 숫자 등이 혼합된 긴 문자열을 원하는 byte 로 잘라보자.
// 다시 작성하기 귀찮아서 적어 둔다. -_-;
// 아래 주석이 핵심 이기에 잘 읽어 두길 바란다. 신경써서 작성 했음 ^^;

//! hjkim: Cut string(Korean+English+Numeric+SpecialCharacter) [
// Note:
// If string is so long cutting a moderately then append "…".
// i.e., cut by 12, "TEST한글123한"
// May expected result is "TEST한글123"
//
#define MAX_LENGTH_STRING 26
{
unsigned char bufStrName[MAX_LENGTH_STRING+2+1] = { 0 }; // MAX Finename: 26, "…": 2
char* pStrName = "test한글ㅎ1234aodkjf023r미ㅓ알ㄴ이f3r";
int pStrNameLength = strlen( pStrName );
int i = 0;
int counter = MAX_LENGTH_STRING;
if( pStrName ) {
if( pStrNameLength > MAX_LENGTH_STRING ) {
memcpy( (void*)bufStrName, (void*)pStrName, MAX_LENGTH_STRING );
bufStrName[MAX_LENGTH_STRING] = '\0';

for( i = MAX_LENGTH_STRING-1; i >= 0; i-- ) {
if( !(bufStrName[i] & 0x80) )
// or if( (bufStrName[i] < 127) )
counter--;
}
memset( (void*)bufStrName, 0x00, sizeof(bufStrName) );
/*
Checks MSB and LSB 2 bytes Korean character.
2 bytes character is bigger than ASCII Decimal code 127.
So, we can know this character is 2 byte(MSB or LSB) character or ASCII how remnant divided total length by 2.
If remnant is zero then its 2 byte character (LSB; means combined) or ASCII code.
But remnant is not a zero then its 2 byte character (MSB; means not combined), so eliminates last byte.
*/
memcpy( (void*)bufStrName, (void*)pStrName, (MAX_LENGTH_STRING-(counter%2)) );
memcpy( (void*)(bufStrName+strlen((char*)bufStrName)), (void*)"…", strlen("…") );
fprintf( stdout, "%s\n", (char*)bufStrName );
}
else
fprintf( stdout, pStrName );
}
}
//! hjkim: Cut string(Korean+English+Numeric+SpecialCharacter) ]

// ---------------------------------------------
// Ascii <-> Unicode
// (Visual C++)
// ---------------------------------------------
int U2A(WCHAR* pUnicode, char* pAscii) {
 int length = WideCharToMultiByte( CP_ACP, 0, pUnicode, -1, NULL, 0, NULL, NULL );
 WideCharToMultiByte( CP_ACP, 0, pUnicode, -1, pAscii, length, 0, 0 );
 return length;
}
int A2U(char* pAscii, WCHAR *pUnicode) {
 int length = MultiByteToWideChar( CP_ACP, 0, (LPCTSTR)pAscii, -1, NULL, NULL );
 MultiByteToWideChar( CP_ACP, 0, (LPCTSTR)pAscii, -1, pUnicode, length );
 return length;
}
// ---------------------------------------------

-----
Cheers,
June

댓글 없음:

댓글 쓰기