Prev: Whither JDK 7?
Next: writing to DVD
From: moonhkt on 26 May 2010 00:41 Hi All I want to display Charset for GB2312 character byte value. Using codePointAt(i) just for Unicode code. How to diaplay GB2123 byte value ? For ²â should be b2e2 For ÊÔ should be cad4 cat temp.txt TEST1|²âÊÔ1 TEST2|²âÊÔ2 TEST3|²âÊÔ3 import java.io.*; public class Readfile00 { public static void main(String[] args) throws UnsupportedEncodingException { String icp = "GB2312"; String ifn = "temp.txt"; PrintStream sysout = new PrintStream(System.out, true, icp); try { File oFile = new File("out_utf.text"); BufferedWriter out = new BufferedWriter( new OutputStreamWriter(new FileOutputStream(oFile),icp)); String a = "" ; char ch ; Integer val=0; String hexstr = ""; BufferedReader in = new BufferedReader( new InputStreamReader(new FileInputStream(ifn), icp)); while (( a = in.readLine()) != null) { int n = a.length(); sysout.println(a); sysout.printf("Length of string is %d%n", n); sysout.printf("CodePoints in string is %d%n", a.codePointCount(0,n)); for (int i = 0; i < n; i++) { ch = a.charAt(i); val = a.codePointAt(i); // Return Unicode code point hexstr = Integer.toHexString(val); Character.UnicodeBlock block = Character.UnicodeBlock.of(ch); sysout.printf("Chr[%4d] DEC=%5s UTF-16=%5s is %4s %s %n", i, val, hexstr, a.charAt(i),block); //byte bv = (byte) ch; //System.out.println("Byte is:=" + bv); out.write(a.charAt(i)); } out.newLine(); } out.close() ; System.out.printf("File %s\n", oFile); } catch (IOException e) { System.out.println(e); } } }
From: RedGrittyBrick on 26 May 2010 16:13 On 26/05/2010 05:41, moonhkt wrote: > Hi All > > > I want to display Charset for GB2312 character byte value. Using > codePointAt(i) just for Unicode code. > How to diaplay GB2123 byte value ? > > For 测 should be b2e2 > For 试 should be cad4 > > cat temp.txt > > TEST1|测试1 > TEST2|测试2 > TEST3|测试3 > Writing 'TEST1|测试1 TEST2|测试2 TEST3|测试3 ' to temp.txt 54455354317cb2e2cad431a54455354327cb2e2cad432a54455354337cb2e2cad433ada .............b2e2cad4 -------------------------------8<---------------------------- import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.PrintWriter; import java.io.UnsupportedEncodingException; public class GB2312Bytes { public static void main(String[] args) { String fileName = "temp.txt"; String text = "TEST1|测试1\n" + "TEST2|测试2\n" + "TEST3|测试3\n"; writeFile(fileName, text, "GB2312"); System.out.println(fileAsHex(fileName)); } private static void writeFile(String fileName, String text, String encoding) { System.out.println("Writing '" + text + "' to " + fileName); PrintWriter pw; try { pw = new PrintWriter(fileName, encoding); pw.println(text); pw.close(); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (UnsupportedEncodingException e) { e.printStackTrace(); } } private static String fileAsHex(String fileName) { StringBuilder sb = new StringBuilder(); FileInputStream in = null; try { in = new FileInputStream(fileName); int c; while ((c = in.read()) != -1) { sb.append(Integer.toHexString(c)); } } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } finally { if (in != null) { try { in.close(); } catch (IOException e) { e.printStackTrace(); } } } return sb.toString(); } } -------------------------------8<---------------------------- -- RGB
From: RedGrittyBrick on 26 May 2010 16:39 On 26/05/2010 21:13, RedGrittyBrick wrote: Oops. if (c < 0x10) { sb.append("0"); } sb.append(Integer.toHexString(c); -- RGB
From: moonhkt on 28 May 2010 03:45 On 5æ27æ¥, ä¸å4æ¶39å, RedGrittyBrick <RedGrittyBr...(a)SpamWeary.invalid> wrote: > On 26/05/2010 21:13, RedGrittyBrick wrote: > > Oops. >          if (c < 0x10) { >            sb.append("0"); >          } >         sb.append(Integer..toHexString(c); > > -- > RGB Hi RGB Our AIX editor can not able to edit GB2312 code, I update the text string with byte value. It is OK ? java GB2312Bytes Change Terminal Emulation to Host charcter to GB2312., the output as below Writing æµè¯ to temp.txt 3f3f3f3f0a od -ct x1 temp.txt 0000000 ? ? ? ? \n 3f 3f 3f 3f 0a 0000005 import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.PrintWriter; import java.io.UnsupportedEncodingException; public class GB2312Bytes { public static void main(String[] args) { String fileName = "temp.txt"; String text = new String( new byte [] { (byte) 0xb2, (byte) 0xe2 , (byte) 0xca , (byte) 0xd4 }); writeFile(fileName, text, "GB2312"); System.out.println(fileAsHex(fileName)); } private static void writeFile(String fileName, String text, String encoding) { System.out.println("Writing '" + text + "' to " + fileName); PrintWriter pw; try { pw = new PrintWriter(fileName, encoding); pw.println(text); pw.close(); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (UnsupportedEncodingException e) { e.printStackTrace(); } } private static String fileAsHex(String fileName) { StringBuilder sb = new StringBuilder(); FileInputStream in = null; try { in = new FileInputStream(fileName); int c; while ((c = in.read()) != -1) { if (c < 0x10) { sb.append("0"); } sb.append(Integer.toHexString(c)); } } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } finally { if (in != null) { try { in.close(); } catch (IOException e) { e.printStackTrace(); } } } return sb.toString(); } }
From: RedGrittyBrick on 28 May 2010 05:58
On 28/05/2010 08:45, moonhkt wrote: > On 5月27日, 上午4时39分, RedGrittyBrick<RedGrittyBr...(a)SpamWeary.invalid> > wrote: >> On 26/05/2010 21:13, RedGrittyBrick wrote: >> >> Oops. >> if (c< 0x10) { >> sb.append("0"); >> } >> sb.append(Integer.toHexString(c); >> >> -- >> RGB > > Hi RGB > > Our AIX editor can not able to edit GB2312 code, I update the text > string with byte value. It is OK ? Since you already had a temp.txt file you could have just commented-out the writeFile() call. I didn't so I used Java to create one - you don't really need to do this if you are certain that your temp.txt contains the characters in GB2312 encoding. But see below ... > > java GB2312Bytes > > Change Terminal Emulation to Host charcter to GB2312., the output as > below > > Writing 测试 to temp.txt > 3f3f3f3f0a > > > od -ct x1 temp.txt > 0000000 ? ? ? ? \n > 3f 3f 3f 3f 0a > 0000005 > > > import java.io.FileInputStream; > import java.io.FileNotFoundException; > import java.io.IOException; > import java.io.PrintWriter; > import java.io.UnsupportedEncodingException; > > public class GB2312Bytes { > public static void main(String[] args) { > String fileName = "temp.txt"; > String text = new String( new byte [] { > (byte) 0xb2, (byte) 0xe2 , (byte) 0xca , (byte) 0xd4 Firstly, you should use Unicode escapes to insert unicode characters. Secondly you should use Unicode code-points not GB2312 code points, this is because Java Strings are Unicode strings (in UTF-16 encoding) See <http://www.herongyang.com/gb2312/ug_map_24.html> 8BD5 CAD4 试 and <http://www.herongyang.com/gb2312/ug_map_15.html> 6D4B B2E2 测 So use String text = "\u6d4b\u8bd5"; When you later write this Unicode String data to a file using GB2312 encoding, Java will translate the Unicode code-point to the GB2312 code point. Also remember that Unicode is much bigger than GB2312, Java can only perform this conversion if the Unicode code points are for characters that are within the GB2312 character set. Unicode code points b2e2 and cad4, that you specified) are actually Korean Hangul characters that are not not in GB2312 and so are translated to "?". > }); > writeFile(fileName, text, "GB2312"); > System.out.println(fileAsHex(fileName)); > } > > private static void writeFile(String fileName, String text, > String encoding) { > System.out.println("Writing '" + text + "' to " + fileName); > PrintWriter pw; > try { > pw = new PrintWriter(fileName, encoding); > pw.println(text); > pw.close(); > } catch (FileNotFoundException e) { > e.printStackTrace(); > } catch (UnsupportedEncodingException e) { > e.printStackTrace(); > } > } > > private static String fileAsHex(String fileName) { > StringBuilder sb = new StringBuilder(); > > FileInputStream in = null; > try { > in = new FileInputStream(fileName); > int c; > while ((c = in.read()) != -1) { > if (c< 0x10) { > sb.append("0"); > } > sb.append(Integer.toHexString(c)); > } > } catch (FileNotFoundException e) { > e.printStackTrace(); > } catch (IOException e) { > e.printStackTrace(); > } finally { > if (in != null) { > try { > in.close(); > } catch (IOException e) { > e.printStackTrace(); > } > } > } > > return sb.toString(); > } > } -- RGB |