Sunday, February 12, 2012

Can't get unicode with ResultSet.getString()

Hi;
I am using MS Sql Server and it has an nvarchar field holding a name.
For ascii chars I get back the text in the database.
But if it is anything other than ascii, I get back a ? for each non
7-bit character. So a field with "a\u9f23b\u4011c" returns "a?b?c"
What do I need to set/do to get back the unicode values in the
database?
thanks - dave
david@.at-at-at@.windward.dot.dot.net
Windward Reports -- http://www.WindwardReports.com
Page 2 Stage -- http://www.Page2Stage.com
Enemy Nations -- http://www.EnemyNations.com
me -- http://dave.thielen.com
Barbie Science Fair -- http://www.BarbieScienceFair.info
(yes I have lots of links)
Here is some code that illustrates the problem - all return a?b?c
package net.windward.store.util.test;
import java.sql.DriverManager;
import java.sql.Connection;
import java.sql.Statement;
import java.sql.ResultSet;
import java.util.Properties;
import java.io.InputStream;
import java.io.Reader;
public class TestJdbcUnicode {
private static String className =
"com.microsoft.jdbc.sqlserver.SQLServerDriver" ;
private static String url =
"jdbc:microsoft:sqlserver://localhost:1433;DatabaseName=StoreTest";
private static String username = "sa";
private static String password = "mmouse";
public static void main(String[] args) throws Exception {
String textInDb = "a?b?c"; // was the actual text (not
\u)
System.out.println("text = " + textInDb);
displayString(textInDb);
textInDb = "a\u98a8b\u0436c";
System.out.println("text = " + textInDb);
displayString(textInDb); // only correct display
System.out.println("Standard open");
Class.forName(className).newInstance();
Connection conn = DriverManager.getConnection(url,
username, password);
Statement stmt =
conn.createStatement(ResultSet.TYPE_SCROLL_INSENSI TIVE,
ResultSet.CONCUR_UPDATABLE);
ResultSet rs = stmt.executeQuery("select * from Person
where PersonId = 25325");
displayResult(rs);
byte [] data = new byte[10];
InputStream is = rs.getAsciiStream("name");
int len = is.read(data);
//obj = rs.getBytes("name");
Reader rdr = rs.getCharacterStream("name");
char [] cbuf = new char[10];
len = rdr.read(cbuf);
System.out.println("Properties open");
Class.forName(className).newInstance();
Properties info = new Properties();
info.put("user", username);
info.put("password", password);
conn = DriverManager.getConnection(url, info);
stmt =
conn.createStatement(ResultSet.TYPE_SCROLL_INSENSI TIVE,
ResultSet.CONCUR_UPDATABLE);
rs = stmt.executeQuery("select * from Person where
PersonId = 25325");
displayResult(rs);
System.out.println("Properties open UTF-8");
Class.forName(className).newInstance();
info = new Properties();
info.put("user", username);
info.put("password", password);
info.put("charSet", "UTF-8");
conn = DriverManager.getConnection(url, info);
stmt =
conn.createStatement(ResultSet.TYPE_SCROLL_INSENSI TIVE,
ResultSet.CONCUR_UPDATABLE);
rs = stmt.executeQuery("select * from Person where
PersonId = 25325");
displayResult(rs);
System.out.println("Properties open UTF-16");
Class.forName(className).newInstance();
info = new Properties();
info.put("user", username);
info.put("password", password);
info.put("charSet", "UTF-16");
conn = DriverManager.getConnection(url, info);
stmt =
conn.createStatement(ResultSet.TYPE_SCROLL_INSENSI TIVE,
ResultSet.CONCUR_UPDATABLE);
rs = stmt.executeQuery("select * from Person where
PersonId = 25325");
displayResult(rs);
System.out.println("Properties open unicode");
Class.forName(className).newInstance();
info = new Properties();
info.put("user", username);
info.put("password", password);
info.put("charSet", "unicode");
conn = DriverManager.getConnection(url, info);
stmt =
conn.createStatement(ResultSet.TYPE_SCROLL_INSENSI TIVE,
ResultSet.CONCUR_UPDATABLE);
rs = stmt.executeQuery("select * from Person where
PersonId = 25325");
displayResult(rs);
}
private static void displayResult(ResultSet rs) throws
Exception {
if (! rs.next()) {
System.err.println("No results");
return;
}
String rtn = rs.getString("name");
displayString(rtn);
}
private static void displayString(String rtn) {
System.out.println("rtn = " + rtn);
System.out.print("rtn[] = ");
for (int ind=0; ind<rtn.length(); ind++)
System.out.print("x" +
Integer.toHexString((int)rtn.charAt(ind)) + " ");
}
}
david@.at-at-at@.windward.dot.dot.net
Windward Reports -- http://www.WindwardReports.com
Page 2 Stage -- http://www.Page2Stage.com
Enemy Nations -- http://www.EnemyNations.com
me -- http://dave.thielen.com
Barbie Science Fair -- http://www.BarbieScienceFair.info
(yes I have lots of links)
|||Yes this is a bug (I am assuming you are using the SQL Server 2005 JDBC
driver Beta2, perhaps not). We cut the getAsciiStream function we had a few
serious bugs in it that were not easy to resolve in a clear cut manner. The
JDBC spec is not super clear on how it is supposed to work.
For example, should getAsciiStream take the incoming TDS character data and
convert this to US-ASCII? What if the incoming data is in Japanese
collation and this is a lossy conversion, etc... there are lots of
situations where this could be lossy. Also, does it just mean send back the
raw bytes? Then why is it called Ascii Stream, etc... It makes my head
hurt bad when I am writing support for SQL Server's 1000+ TDS language
collations and I don't want to be lossy and corrupt customer data.
I think you can use getBytes to work around this for now, something like
this I believe ->
ByteArrayInputStream bas = new ByteArrayInputStream(rs.getBytes(2));
This might give you bytes in UNICODE that would necessitate you converting
to single byte stream, but this depends upon your back end collation. If
the collation is a simple 2:1 UNICODE -> Single Byte mapping then it is easy
to strip out every other byte.
You can do things like this as well:
InputStreamReader isr = new InputStreamReader(new
ByteArrayInputStream(rs.getBytes(2)), "US-ASCII");
Twiggle around with the "US-ASCII", maybe you want to convert using some
other encoding like UTF-8, etc...
Let me know which driver you are using and perhaps I can come up with a
better solution.
Also, I would like to hear your reasoning behind using getAsciiStream, why
do you find you need to use this API, let me know about this, this would be
good feedback for our team working on the SQL JDB 2005 driver.
Matt Neerincx [MSFT]
This posting is provided "AS IS", with no warranties, and confers no rights.
Please do not send email directly to this alias. This alias is for newsgroup
purposes only.
"David Thielen" <david@.windward.net> wrote in message
news:c1srh1te9er4nh0h089oagua0frhgupuhm@.4ax.com...
> Here is some code that illustrates the problem - all return a?b?c
> package net.windward.store.util.test;
> import java.sql.DriverManager;
> import java.sql.Connection;
> import java.sql.Statement;
> import java.sql.ResultSet;
> import java.util.Properties;
> import java.io.InputStream;
> import java.io.Reader;
> public class TestJdbcUnicode {
> private static String className =
> "com.microsoft.jdbc.sqlserver.SQLServerDriver" ;
> private static String url =
> "jdbc:microsoft:sqlserver://localhost:1433;DatabaseName=StoreTest";
> private static String username = "sa";
> private static String password = "mmouse";
> public static void main(String[] args) throws Exception {
> String textInDb = "a?b?c"; // was the actual text (not
> \u)
> System.out.println("text = " + textInDb);
> displayString(textInDb);
> textInDb = "a\u98a8b\u0436c";
> System.out.println("text = " + textInDb);
> displayString(textInDb); // only correct display
> System.out.println("Standard open");
> Class.forName(className).newInstance();
> Connection conn = DriverManager.getConnection(url,
> username, password);
> Statement stmt =
> conn.createStatement(ResultSet.TYPE_SCROLL_INSENSI TIVE,
> ResultSet.CONCUR_UPDATABLE);
> ResultSet rs = stmt.executeQuery("select * from Person
> where PersonId = 25325");
> displayResult(rs);
> byte [] data = new byte[10];
> InputStream is = rs.getAsciiStream("name");
> int len = is.read(data);
> // obj = rs.getBytes("name");
> Reader rdr = rs.getCharacterStream("name");
> char [] cbuf = new char[10];
> len = rdr.read(cbuf);
>
> System.out.println("Properties open");
> Class.forName(className).newInstance();
> Properties info = new Properties();
> info.put("user", username);
> info.put("password", password);
> conn = DriverManager.getConnection(url, info);
> stmt =
> conn.createStatement(ResultSet.TYPE_SCROLL_INSENSI TIVE,
> ResultSet.CONCUR_UPDATABLE);
> rs = stmt.executeQuery("select * from Person where
> PersonId = 25325");
> displayResult(rs);
>
> System.out.println("Properties open UTF-8");
> Class.forName(className).newInstance();
> info = new Properties();
> info.put("user", username);
> info.put("password", password);
> info.put("charSet", "UTF-8");
> conn = DriverManager.getConnection(url, info);
> stmt =
> conn.createStatement(ResultSet.TYPE_SCROLL_INSENSI TIVE,
> ResultSet.CONCUR_UPDATABLE);
> rs = stmt.executeQuery("select * from Person where
> PersonId = 25325");
> displayResult(rs);
>
> System.out.println("Properties open UTF-16");
> Class.forName(className).newInstance();
> info = new Properties();
> info.put("user", username);
> info.put("password", password);
> info.put("charSet", "UTF-16");
> conn = DriverManager.getConnection(url, info);
> stmt =
> conn.createStatement(ResultSet.TYPE_SCROLL_INSENSI TIVE,
> ResultSet.CONCUR_UPDATABLE);
> rs = stmt.executeQuery("select * from Person where
> PersonId = 25325");
> displayResult(rs);
>
> System.out.println("Properties open unicode");
> Class.forName(className).newInstance();
> info = new Properties();
> info.put("user", username);
> info.put("password", password);
> info.put("charSet", "unicode");
> conn = DriverManager.getConnection(url, info);
> stmt =
> conn.createStatement(ResultSet.TYPE_SCROLL_INSENSI TIVE,
> ResultSet.CONCUR_UPDATABLE);
> rs = stmt.executeQuery("select * from Person where
> PersonId = 25325");
> displayResult(rs);
> }
> private static void displayResult(ResultSet rs) throws
> Exception {
> if (! rs.next()) {
> System.err.println("No results");
> return;
> }
> String rtn = rs.getString("name");
> displayString(rtn);
> }
> private static void displayString(String rtn) {
> System.out.println("rtn = " + rtn);
> System.out.print("rtn[] = ");
> for (int ind=0; ind<rtn.length(); ind++)
> System.out.print("x" +
> Integer.toHexString((int)rtn.charAt(ind)) + " ");
> }
> }
>
> david@.at-at-at@.windward.dot.dot.net
> Windward Reports -- http://www.WindwardReports.com
> Page 2 Stage -- http://www.Page2Stage.com
> Enemy Nations -- http://www.EnemyNations.com
> me -- http://dave.thielen.com
> Barbie Science Fair -- http://www.BarbieScienceFair.info
> (yes I have lots of links)

No comments:

Post a Comment