summaryrefslogtreecommitdiff
path: root/lib/jinterface
diff options
context:
space:
mode:
authorRickard Green <rickard@erlang.org>2013-01-05 03:07:14 +0100
committerRickard Green <rickard@erlang.org>2013-01-16 17:14:22 +0100
commit5d79f55ca441727578d34b78ee0d6d8aa80976ee (patch)
treed5e1df74d1742dbc7fad8f04ecdde56bb52a1185 /lib/jinterface
parent685d009efcfd7521e9c918a14b58eac19755299d (diff)
downloaderlang-5d79f55ca441727578d34b78ee0d6d8aa80976ee.tar.gz
Implement UTF-8 atom support for jinterface
Diffstat (limited to 'lib/jinterface')
-rw-r--r--lib/jinterface/java_src/com/ericsson/otp/erlang/AbstractNode.java4
-rw-r--r--lib/jinterface/java_src/com/ericsson/otp/erlang/OtpErlangAtom.java2
-rw-r--r--lib/jinterface/java_src/com/ericsson/otp/erlang/OtpExternal.java6
-rw-r--r--lib/jinterface/java_src/com/ericsson/otp/erlang/OtpInputStream.java64
-rw-r--r--lib/jinterface/java_src/com/ericsson/otp/erlang/OtpOutputStream.java60
-rw-r--r--lib/jinterface/test/nc_SUITE.erl146
6 files changed, 226 insertions, 56 deletions
diff --git a/lib/jinterface/java_src/com/ericsson/otp/erlang/AbstractNode.java b/lib/jinterface/java_src/com/ericsson/otp/erlang/AbstractNode.java
index 16cb544a16..c76fad5e45 100644
--- a/lib/jinterface/java_src/com/ericsson/otp/erlang/AbstractNode.java
+++ b/lib/jinterface/java_src/com/ericsson/otp/erlang/AbstractNode.java
@@ -90,6 +90,8 @@ public class AbstractNode {
static final int dFlagExportPtrTag = 0x200; // NOT SUPPORTED
static final int dFlagBitBinaries = 0x400;
static final int dFlagNewFloats = 0x800;
+ static final int dFlagUnicodeIo = 0x1000;
+ static final int dFlagUtf8Atoms = 0x10000;
int ntype = NTYPE_R6;
int proto = 0; // tcp/ip
@@ -98,7 +100,7 @@ public class AbstractNode {
int creation = 0;
int flags = dFlagExtendedReferences | dFlagExtendedPidsPorts
| dFlagBitBinaries | dFlagNewFloats | dFlagFunTags
- | dflagNewFunTags;
+ | dflagNewFunTags | dFlagUtf8Atoms;
/* initialize hostname and default cookie */
static {
diff --git a/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpErlangAtom.java b/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpErlangAtom.java
index ced4dbb8c2..2768edc6fa 100644
--- a/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpErlangAtom.java
+++ b/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpErlangAtom.java
@@ -51,7 +51,7 @@ public class OtpErlangAtom extends OtpErlangObject implements Serializable,
"null string value");
}
- if (atom.length() > maxAtomLength) {
+ if (atom.codePointCount(0, atom.length()) > maxAtomLength) {
throw new java.lang.IllegalArgumentException("Atom may not exceed "
+ maxAtomLength + " characters: " + atom);
}
diff --git a/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpExternal.java b/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpExternal.java
index e70b9a786b..2a4cd4fa2d 100644
--- a/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpExternal.java
+++ b/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpExternal.java
@@ -88,6 +88,12 @@ public class OtpExternal {
/** The tag used for old Funs */
public static final int funTag = 117;
+ /** The tag used for unicode atoms */
+ public static final int atomUtf8Tag = 118;
+
+ /** The tag used for small unicode atoms */
+ public static final int smallAtomUtf8Tag = 119;
+
/** The tag used for compressed terms */
public static final int compressedTag = 80;
diff --git a/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpInputStream.java b/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpInputStream.java
index ae5f4ee072..c2a79af841 100644
--- a/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpInputStream.java
+++ b/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpInputStream.java
@@ -351,26 +351,64 @@ public class OtpInputStream extends ByteArrayInputStream {
*/
public String read_atom() throws OtpErlangDecodeException {
int tag;
- int len;
+ int len = -1;
byte[] strbuf;
String atom;
tag = read1skip_version();
- if (tag != OtpExternal.atomTag) {
- throw new OtpErlangDecodeException(
- "wrong tag encountered, expected " + OtpExternal.atomTag
- + ", got " + tag);
- }
+ switch (tag) {
- len = read2BE();
+ case OtpExternal.atomTag:
+ len = read2BE();
+ strbuf = new byte[len];
+ this.readN(strbuf);
+ try {
+ atom = new String(strbuf, "ISO-8859-1");
+ } catch (final java.io.UnsupportedEncodingException e) {
+ throw new OtpErlangDecodeException(
+ "Failed to decode ISO-8859-1 atom");
+ }
+ if (atom.length() > OtpExternal.maxAtomLength) {
+ /*
+ * Throwing an exception would be better I think,
+ * but truncation seems to be the way it has
+ * been done in other parts of OTP...
+ */
+ atom = atom.substring(0, OtpExternal.maxAtomLength);
+ }
+ break;
- strbuf = new byte[len];
- this.readN(strbuf);
- atom = OtpErlangString.newString(strbuf);
+ case OtpExternal.smallAtomUtf8Tag:
+ len = read1();
+ /* fall through */
+ case OtpExternal.atomUtf8Tag:
+ if (len < 0) {
+ len = read2BE();
+ }
+ strbuf = new byte[len];
+ this.readN(strbuf);
+ try {
+ atom = new String(strbuf, "UTF-8");
+ } catch (final java.io.UnsupportedEncodingException e) {
+ throw new OtpErlangDecodeException(
+ "Failed to decode UTF-8 atom");
+ }
+ if (atom.codePointCount(0, atom.length()) > OtpExternal.maxAtomLength) {
+ /*
+ * Throwing an exception would be better I think,
+ * but truncation seems to be the way it has
+ * been done in other parts of OTP...
+ */
+ final int[] cps = OtpErlangString.stringToCodePoints(atom);
+ atom = new String(cps, 0, OtpExternal.maxAtomLength);
+ }
+ break;
- if (atom.length() > OtpExternal.maxAtomLength) {
- atom = atom.substring(0, OtpExternal.maxAtomLength);
+ default:
+ throw new OtpErlangDecodeException(
+ "wrong tag encountered, expected " + OtpExternal.atomTag
+ + ", or " + OtpExternal.atomUtf8Tag + ", got " + tag);
}
return atom;
@@ -1152,6 +1190,8 @@ public class OtpInputStream extends ByteArrayInputStream {
return new OtpErlangLong(this);
case OtpExternal.atomTag:
+ case OtpExternal.smallAtomUtf8Tag:
+ case OtpExternal.atomUtf8Tag:
return new OtpErlangAtom(this);
case OtpExternal.floatTag:
diff --git a/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpOutputStream.java b/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpOutputStream.java
index 22ebb4688a..10bdf389cd 100644
--- a/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpOutputStream.java
+++ b/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpOutputStream.java
@@ -343,9 +343,63 @@ public class OtpOutputStream extends ByteArrayOutputStream {
* the string to write.
*/
public void write_atom(final String atom) {
- write1(OtpExternal.atomTag);
- write2BE(atom.length());
- writeN(atom.getBytes());
+ String enc_atom;
+ byte[] bytes;
+ boolean isLatin1 = true;
+
+ if (atom.codePointCount(0, atom.length()) <= OtpExternal.maxAtomLength) {
+ enc_atom = atom;
+ }
+ else {
+ /*
+ * Throwing an exception would be better I think,
+ * but truncation seems to be the way it has
+ * been done in other parts of OTP...
+ */
+ enc_atom = new String(OtpErlangString.stringToCodePoints(atom),
+ 0, OtpExternal.maxAtomLength);
+ }
+
+ for (int offset = 0; offset < enc_atom.length();) {
+ final int cp = enc_atom.codePointAt(offset);
+ if ((cp & ~0xFF) != 0) {
+ isLatin1 = false;
+ break;
+ }
+ offset += Character.charCount(cp);
+ }
+ try {
+ if (isLatin1) {
+ bytes = enc_atom.getBytes("ISO-8859-1");
+ write1(OtpExternal.atomTag);
+ write2BE(bytes.length);
+ }
+ else {
+ bytes = enc_atom.getBytes("UTF-8");
+ final int length = bytes.length;
+ if (length < 256) {
+ write1(OtpExternal.smallAtomUtf8Tag);
+ write1(length);
+ }
+ else {
+ write1(OtpExternal.atomUtf8Tag);
+ write2BE(length);
+ }
+ }
+ writeN(bytes);
+ } catch (final java.io.UnsupportedEncodingException e) {
+ /*
+ * Sigh, why didn't the API designer add an
+ * OtpErlangEncodeException to these encoding
+ * functions?!? Instead of changing the API we
+ * write an invalid atom and let it fail for
+ * whoever trying to decode this... Sigh,
+ * again...
+ */
+ write1(OtpExternal.smallAtomUtf8Tag);
+ write1(2);
+ write2BE(0xffff); /* Invalid UTF-8 */
+ }
}
/**
diff --git a/lib/jinterface/test/nc_SUITE.erl b/lib/jinterface/test/nc_SUITE.erl
index d5388e54f4..ccc87a6761 100644
--- a/lib/jinterface/test/nc_SUITE.erl
+++ b/lib/jinterface/test/nc_SUITE.erl
@@ -22,6 +22,15 @@
-include_lib("common_test/include/ct.hrl").
-include("test_server_line.hrl").
+-define(VERSION_MAGIC, 131).
+
+-define(ATOM_EXT, 100).
+-define(REFERENCE_EXT, 101).
+-define(PORT_EXT, 102).
+-define(PID_EXT, 103).
+-define(NEW_REFERENCE_EXT, 114).
+-define(ATOM_UTF8_EXT, 118).
+-define(SMALL_ATOM_UTF8_EXT, 119).
-export([all/0, suite/0,groups/0,init_per_group/2,end_per_group/2,
init_per_suite/1,
@@ -45,6 +54,10 @@
unicode/1,
unicode_list_to_string/1,
unicode_string_to_list/1,
+ utf8_atom/1,
+ utf8_pid/1,
+ utf8_port/1,
+ utf8_ref/1,
connect/1]).
@@ -58,7 +71,9 @@ all() ->
decompress_roundtrip, compress_roundtrip,
integer_roundtrip, fun_roundtrip, lists_roundtrip,
lists_roundtrip_2, lists_iterator, unicode,
- unicode_list_to_string, unicode_string_to_list, connect].
+ unicode_list_to_string, unicode_string_to_list,
+ utf8_atom, utf8_pid, utf8_port, utf8_ref,
+ connect].
groups() ->
[].
@@ -448,6 +463,71 @@ unicode_string_to_list(Config) when is_list(Config) ->
end, ["unicode"]).
+evil_smiley() ->
+ <<240,159,152,136>>.
+
+evil_smileys(0) ->
+ [];
+evil_smileys(N) ->
+ [evil_smiley() | evil_smileys(N-1)].
+
+utf8_atom(Config) when is_list(Config) ->
+ ES = evil_smiley(),
+ SmallUA = binary_to_term(list_to_binary([?VERSION_MAGIC,
+ ?SMALL_ATOM_UTF8_EXT,
+ size(ES),
+ ES])),
+ true = is_atom(SmallUA),
+ NoESs = 300 div size(ES),
+ ESs = evil_smileys(NoESs),
+ LargeUA = binary_to_term(list_to_binary([?VERSION_MAGIC,
+ ?ATOM_UTF8_EXT,
+ uint16_be(NoESs*size(ES)),
+ ESs])),
+ true = is_atom(LargeUA),
+ erlang:display({atom, SmallUA, LargeUA}),
+ do_echo([SmallUA, LargeUA], Config).
+
+utf8_nodenames_ext() ->
+ H = "@host",
+ ES = evil_smiley(),
+ SmallUANodeExt = list_to_binary([?SMALL_ATOM_UTF8_EXT,
+ size(ES)+length(H),
+ ES,
+ H]),
+ NoESs = 300 div size(ES),
+ ESs = evil_smileys(NoESs),
+ LargeUANodeExt = list_to_binary([?ATOM_UTF8_EXT,
+ uint16_be(NoESs*size(ES)+length(H)),
+ ESs,
+ H]),
+ {SmallUANodeExt, LargeUANodeExt}.
+
+utf8_pid(Config) when is_list(Config) ->
+ {SmallUANodeExt, LargeUANodeExt} = utf8_nodenames_ext(),
+ SmallPid = mk_pid({SmallUANodeExt, 2}, 4711, 4711),
+ LargePid = mk_pid({LargeUANodeExt, 2}, 4711, 4711),
+ erlang:display({pid, SmallPid, node(SmallPid)}),
+ erlang:display({pid, LargePid, node(LargePid)}),
+ do_echo([SmallPid, LargePid], Config).
+
+utf8_port(Config) when is_list(Config) ->
+ {SmallUANodeExt, LargeUANodeExt} = utf8_nodenames_ext(),
+ SmallPort = mk_port({SmallUANodeExt, 2}, 4711),
+ erlang:display({port, SmallPort, node(SmallPort)}),
+ LargePort = mk_port({LargeUANodeExt, 2}, 4711),
+ erlang:display({port, LargePort, node(LargePort)}),
+ do_echo([SmallPort, LargePort], Config).
+
+utf8_ref(Config) when is_list(Config) ->
+ {SmallUANodeExt, LargeUANodeExt} = utf8_nodenames_ext(),
+ SmallRef = mk_ref({SmallUANodeExt, 2}, [4711, 4711, 4711]),
+ erlang:display({ref, SmallRef, node(SmallRef)}),
+ LargeRef = mk_ref({LargeUANodeExt, 2}, [4711, 4711, 4711]),
+ erlang:display({ref, LargeRef, node(LargeRef)}),
+ do_echo([SmallRef, LargeRef], Config).
+
+
%% Lazy list
cp_gen(N) ->
cp_gen(N, -1, 16#110000).
@@ -646,16 +726,6 @@ make_name() ->
++ "-" ++ integer_to_list(B)
++ "-" ++ integer_to_list(C)).
-
-
--define(VERSION_MAGIC, 131).
-
--define(ATOM_EXT, 100).
--define(REFERENCE_EXT, 101).
--define(PORT_EXT, 102).
--define(PID_EXT, 103).
--define(NEW_REFERENCE_EXT, 114).
-
uint32_be(Uint) when is_integer(Uint), 0 =< Uint, Uint < 1 bsl 32 ->
[(Uint bsr 24) band 16#ff,
(Uint bsr 16) band 16#ff,
@@ -679,72 +749,70 @@ uint8(Uint) ->
mk_pid({NodeName, Creation}, Number, Serial) when is_atom(NodeName) ->
- mk_pid({atom_to_list(NodeName), Creation}, Number, Serial);
-mk_pid({NodeName, Creation}, Number, Serial) ->
+ <<?VERSION_MAGIC, NodeNameExt/binary>> = term_to_binary(NodeName),
+ mk_pid({NodeNameExt, Creation}, Number, Serial);
+mk_pid({NodeNameExt, Creation}, Number, Serial) ->
case catch binary_to_term(list_to_binary([?VERSION_MAGIC,
?PID_EXT,
- ?ATOM_EXT,
- uint16_be(length(NodeName)),
- NodeName,
+ NodeNameExt,
uint32_be(Number),
uint32_be(Serial),
uint8(Creation)])) of
Pid when is_pid(Pid) ->
Pid;
{'EXIT', {badarg, _}} ->
- exit({badarg, mk_pid, [{NodeName, Creation}, Number, Serial]});
+ exit({badarg, mk_pid, [{NodeNameExt, Creation}, Number, Serial]});
Other ->
exit({unexpected_binary_to_term_result, Other})
end.
mk_port({NodeName, Creation}, Number) when is_atom(NodeName) ->
- mk_port({atom_to_list(NodeName), Creation}, Number);
-mk_port({NodeName, Creation}, Number) ->
+ <<?VERSION_MAGIC, NodeNameExt/binary>> = term_to_binary(NodeName),
+ mk_port({NodeNameExt, Creation}, Number);
+mk_port({NodeNameExt, Creation}, Number) ->
case catch binary_to_term(list_to_binary([?VERSION_MAGIC,
?PORT_EXT,
- ?ATOM_EXT,
- uint16_be(length(NodeName)),
- NodeName,
+ NodeNameExt,
uint32_be(Number),
uint8(Creation)])) of
Port when is_port(Port) ->
Port;
{'EXIT', {badarg, _}} ->
- exit({badarg, mk_port, [{NodeName, Creation}, Number]});
+ exit({badarg, mk_port, [{NodeNameExt, Creation}, Number]});
Other ->
exit({unexpected_binary_to_term_result, Other})
end.
-mk_ref({NodeName, Creation}, Numbers) when is_atom(NodeName),
- is_integer(Creation),
- is_list(Numbers) ->
- mk_ref({atom_to_list(NodeName), Creation}, Numbers);
-mk_ref({NodeName, Creation}, [Number]) when is_list(NodeName),
- is_integer(Creation),
- is_integer(Number) ->
+mk_ref({NodeName, Creation}, [Number] = NL) when is_atom(NodeName),
+ is_integer(Creation),
+ is_integer(Number) ->
+ <<?VERSION_MAGIC, NodeNameExt/binary>> = term_to_binary(NodeName),
+ mk_ref({NodeNameExt, Creation}, NL);
+mk_ref({NodeNameExt, Creation}, [Number]) when is_integer(Creation),
+ is_integer(Number) ->
case catch binary_to_term(list_to_binary([?VERSION_MAGIC,
?REFERENCE_EXT,
- ?ATOM_EXT,
- uint16_be(length(NodeName)),
- NodeName,
+ NodeNameExt,
uint32_be(Number),
uint8(Creation)])) of
Ref when is_reference(Ref) ->
Ref;
{'EXIT', {badarg, _}} ->
- exit({badarg, mk_ref, [{NodeName, Creation}, [Number]]});
+ exit({badarg, mk_ref, [{NodeNameExt, Creation}, [Number]]});
Other ->
exit({unexpected_binary_to_term_result, Other})
end;
-mk_ref({NodeName, Creation}, Numbers) when is_list(NodeName),
+mk_ref({NodeName, Creation}, Numbers) when is_atom(NodeName),
is_integer(Creation),
is_list(Numbers) ->
+ <<?VERSION_MAGIC, NodeNameExt/binary>> = term_to_binary(NodeName),
+ mk_ref({NodeNameExt, Creation}, Numbers);
+mk_ref({NodeNameExt, Creation}, Numbers) when is_integer(Creation),
+ is_list(Numbers) ->
case catch binary_to_term(list_to_binary([?VERSION_MAGIC,
?NEW_REFERENCE_EXT,
uint16_be(length(Numbers)),
- ?ATOM_EXT,
- uint16_be(length(NodeName)),
- NodeName,
+ NodeNameExt,
uint8(Creation),
lists:map(fun (N) ->
uint32_be(N)
@@ -753,7 +821,7 @@ mk_ref({NodeName, Creation}, Numbers) when is_list(NodeName),
Ref when is_reference(Ref) ->
Ref;
{'EXIT', {badarg, _}} ->
- exit({badarg, mk_ref, [{NodeName, Creation}, Numbers]});
+ exit({badarg, mk_ref, [{NodeNameExt, Creation}, Numbers]});
Other ->
exit({unexpected_binary_to_term_result, Other})
end.