Few years back, I was working in font conversion. This system does the converstion of PCS Nepali ttf font to Nepali Unicode. The main purpose of this project was to convert all the Nepali archived voter list. I hope this will help to all who want to explore in font processing and data conversion.
Conversion Source :
001 /* 002 * To change this template, choose Tools | Templates 003 * and open the template in the editor. 004 */ 005 006 import java.util.*; 007 import java.util.ArrayList; 008 009 /** 010 * 011 * @author Santa 012 */ 013 public class unicodeConversion { 014 015 List<String> halfLetter = new ArrayList<String>(); 016 fileTokenizer ftok; 017 018 public unicodeConversion() { 019 ftok = new fileTokenizer(); 020 halfLetter = ftok.readFileText("halfLetter.txt"); 021 } 022 023 String getUnicode(char ch) { 024 switch (ch) { 025 case '0': 026 return ("०"); 027 case '1': 028 return ("१"); 029 case '2': 030 return ("२"); 031 case '3': 032 return ("३"); 033 case '4': 034 return ("४"); 035 case '5': 036 return ("५"); 037 case '6': 038 return ("६"); 039 case '7': 040 return ("७"); 041 case '8': 042 return ("८"); 043 case '9': 044 return ("९"); 045 case '~': 046 return ("ङ"); 047 case '`': 048 return ("ञ्"); 049 case '!': 050 return ("ज्ञ"); 051 case '@': 052 return ("ई"); 053 case '#': 054 return ("घ"); 055 case '$': 056 return ("द्ध"); 057 case '%': 058 return ("छ"); 059 case '^': 060 return ("ट"); 061 case '&': 062 return ("ठ"); 063 case '*': 064 return ("ड"); 065 case '(': 066 return ("ढ"); 067 case ')': 068 return ("ण्"); 069 case '-': 070 return ("("); 071 case '_': 072 return (")"); 073 case '+': 074 return ("ं"); 075 case '=': 076 return ("॰"); 077 case 'q': 078 return ("त्र"); 079 case 'w': 080 return ("ध"); 081 case 'e': 082 return ("भ"); 083 case 'r': 084 return ("च"); 085 case 't': 086 return ("त"); 087 case 'Q': 088 return ("त्त"); 089 case 'W': 090 return ("ध्"); 091 case 'E': 092 return ("भ्"); 093 case 'R': 094 return ("च्"); 095 case 'T': 096 return ("त्"); 097 case 'p': 098 return ("उ"); 099 case 'o': 100 return ("य"); 101 case 'i': 102 return ("ष्"); 103 case 'u': 104 return ("ग"); 105 case 'y': 106 return ("थ"); 107 case 'P': 108 return ("ए"); 109 case 'O': 110 return ("इ"); 111 case 'I': 112 return ("क्ष्"); 113 case 'U': 114 return ("ग्"); 115 case 'Y': 116 return ("थ्"); 117 case '[': 118 return ("ृ"); 119 case '{': 120 return ("र्"); 121 case ']': 122 return ("े"); 123 case '}': 124 return ("ै"); 125 case 'a': 126 return ("ब"); 127 case 's': 128 return ("क"); 129 case 'd': 130 return ("म"); 131 case 'f': 132 return ("ा"); 133 case 'g': 134 return ("न"); 135 case 'A': 136 return ("ब्"); 137 case 'S': 138 return ("क्"); 139 case 'D': 140 return ("म्"); 141 case 'F': 142 return ("ँ"); 143 case 'G': 144 return ("न्"); 145 case ';': 146 return ("स"); 147 case 'l': 148 return ("ि"); 149 case 'k': 150 return ("प"); 151 case 'j': 152 return ("व"); 153 case 'h': 154 return ("ज"); 155 case ':': 156 return ("स्"); 157 case 'L': 158 return ("ी"); 159 case 'K': 160 return ("प्"); 161 case 'J': 162 return ("व्"); 163 case 'H': 164 return ("ज्"); 165 case 'z': 166 return ("श"); 167 case 'x': 168 return ("ह"); 169 case 'c': 170 return ("अ"); 171 case 'v': 172 return ("ख"); 173 case 'Z': 174 return ("श्"); 175 case 'X': 176 return ("ह्"); 177 //case 'C': 178 //return(""); 179 case 'V': 180 return ("ख्"); 181 case '/': 182 return ("र"); 183 case '.': 184 return ("।"); 185 case ',': 186 return ("‚"); 187 //case 'm': 188 //return(""); 189 case 'n': 190 return ("ल"); 191 case 'b': 192 return ("द"); 193 case '?': 194 return ("रू"); 195 case '>': 196 return ("श्र"); 197 //case '<': 198 //return(""); 199 case 'M': 200 return ("ः"); 201 case 'N': 202 return ("ल्"); 203 case 'B': 204 return ("द्य"); 205 206 //Special Characters 207 //case '|': 208 //return ("र्"); 209 case '¤': 210 return ("ँ"); 211 case 'ñ': 212 return ("ङ"); 213 case '\'': 214 return ("ु"); 215 case '\"': 216 return ("ू"); 217 218 case 161: 219 return("ज्ञ्"); 220 case 162: 221 return("द्ध"); 222 case 163: 223 return("घ्"); 224 case 165: 225 return("ऋ"); 226 case 167: 227 return("ट्ट"); 228 case 169: 229 return("?"); 230 case 170: 231 return("ञ"); 232 case 174: 233 return("+"); 234 case 176: 235 return("ङ्क"); 236 case 180: 237 return("झ"); 238 case 183: 239 return("ठ्ठ"); 240 case 191: 241 return("रु"); 242 case 198: 243 return("\""); 244 case 210: 245 return("ू"); 246 case 212: 247 return("क्ष"); 248 //case 216: 249 //return(""); 250 case 217: 251 return("ह"); 252 case 218: 253 return("ु"); 254 case 223: 255 return("द्म"); 256 case 229: 257 return("द्व"); 258 case 230: 259 return("\""); 260 case 231: 261 return("ॐ"); 262 case 233: 263 return("ङ्ग"); 264 case 237: 265 return("ष"); 266 case 247: 267 return("/"); 268 case 248: 269 return("य्"); 270 case 250: 271 return("ू"); 272 273 default: 274 return (ch + ""); 275 } 276 } 277 278 private boolean isHalf(String s) { 279 if (halfLetter.contains(s)) { 280 return (true); 281 } else { 282 return (false); 283 } 284 } 285 286 private boolean isToLookNext(char ch) { 287 switch (ch) { 288 case 'e'://भ 289 case 'k'://प 290 case 'i'://ष् 291 case 'I'://क्ष् 292 case 'f'://ा 293 case 'Q'://त्त 294 return (true); 295 default: 296 return (false); 297 } 298 } 299 300 private boolean isToLookPrevious(char ch) { 301 switch (ch) { 302 case '|': 303 return (true); 304 default: 305 return (false); 306 } 307 } 308 309 private boolean isAakar(char ch) { 310 switch (ch) { 311 case 'ा': 312 case 'ु': 313 case 'ू': 314 case 'ो': 315 case 'ौ': 316 case 'ी': 317 case 'ि': 318 case 'े': 319 case 'ै': 320 return (true); 321 default: 322 return (false); 323 } 324 } 325 326 private StringBuffer insertHalfRa(StringBuffer data) { 327 char ch = data.charAt(data.length() - 1); 328 try { 329 if (isAakar(ch)) { 330 data.insert(data.length() - 2, "र्"); 331 } else { 332 data.insert(data.length() - 1, "र्"); 333 } 334 } catch (Exception ee) { 335 //data.insert(data.length()-1, "र्"); 336 } 337 return (data); 338 } 339 340 private String processHalfRa(StringBuffer data) { 341 StringBuffer result = new StringBuffer(); 342 int i = 0; 343 char ch, ch1; 344 while (i < data.length()) { 345 ch = data.charAt(i); 346 if (ch == 'र') { 347 i++; 348 try { 349 ch1 = data.charAt(i); 350 if (ch1 == '्') { 351 //Inserting it into Previous Character 352 result = insertHalfRa(result); 353 } else { 354 result.append(ch); 355 i--; 356 } 357 } catch (Exception ee) { 358 result.append(ch); 359 i--; 360 } 361 } else { 362 result.append(ch); 363 } 364 i++; 365 } 366 return (result.toString()); 367 } 368 369 public String convertToUnicode(String s) { 370 String result = ""; 371 int i; 372 char ch, ch1, ch2; 373 boolean iflag = false; 374 String tmp; 375 i = 0; 376 while (i < s.length()) { 377 try { 378 ch = s.charAt(i); 379 //Special अ process 380 if (ch == 'c') { 381 i++; 382 ch1 = s.charAt(i); 383 if (ch1 == 'f') { 384 if (i + 1 < s.length()) { 385 i++; 386 ch2 = s.charAt(i); 387 if (ch2 == ']') { 388 result += "ओ"; 389 } else if (ch2 == '}') { 390 result += "औ"; 391 } else { 392 i--; 393 result += "आ"; 394 } 395 } 396 } else { 397 result += "अ"; 398 i--; 399 } 400 } //Looking Next Char 401 else if (isToLookNext(ch)) { 402 i++; 403 ch1 = s.charAt(i); 404 if (ch == 'e' && ch1 == 'm') { 405 result += "झ"; 406 if (iflag) { 407 result += "ि"; 408 iflag = false; 409 } 410 } else if (ch == 'k' && ch1 == 'm') { 411 result += "फ"; 412 if (iflag) { 413 result += "ि"; 414 iflag = false; 415 } 416 } else if (ch == 'Q' && ch1 == 'm') { 417 result += "क्त"; 418 if (iflag) { 419 result += "ि"; 420 iflag = false; 421 } 422 } else if (ch == 'i' && ch1 == 'f') { 423 result += "ष"; 424 if (iflag) { 425 result += "ि"; 426 iflag = false; 427 } 428 } else if (ch == 'I' && ch1 == 'f') { 429 result += "क्ष"; 430 if (iflag) { 431 result += "ि"; 432 iflag = false; 433 } 434 } else if (ch == 'f' && ch1 == ']') { 435 result += "ो"; 436 } else if (ch == 'f' && ch1 == '}') { 437 result += "ौ"; 438 } else { 439 i--; 440 tmp = getUnicode(ch); 441 result += tmp; 442 if (iflag) { 443 if (!isHalf(tmp)) { 444 result += "ि"; 445 iflag = false; 446 } 447 } 448 } 449 } else if (isToLookPrevious(ch)) { 450 result += "्"; 451 result += "र"; 452 if (iflag) { 453 result += "ि"; 454 iflag = false; 455 } 456 } else { 457 if (ch == 'l') { 458 iflag = true; 459 } else { 460 tmp = getUnicode(ch); 461 result += tmp; 462 try { 463 ch1 = s.charAt(i + 1); 464 if (ch1 != '|') { 465 if (iflag) { 466 if (!isHalf(tmp)) { 467 result += "ि"; 468 iflag = false; 469 } 470 } 471 } 472 } catch (Exception ee) { 473 } 474 } 475 } 476 } catch (Exception ee) { 477 i--; 478 ch = s.charAt(i); 479 tmp = getUnicode(ch); 480 result += tmp; 481 } 482 i++; 483 } 484 result = processHalfRa(new StringBuffer(result)); 485 return (result); 486 } 487 } |
In the above source, "halfLetter.txt" contains all the half characters representation of Nepali text.
Usage :
String Output = new unicodeConversion().convertToUnicode(src);
Cheers !
Santa
No comments:
Post a Comment