summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCharles Oliver Nutter <headius@headius.com>2023-01-12 14:42:48 -0600
committerCharles Oliver Nutter <headius@headius.com>2023-01-12 14:45:59 -0600
commit209e4655e60164e6d0e86e892acdc52d76b5a6b4 (patch)
treec63bd704168a8525cab3cb70e1000fa22ff49e30
parent5c377ef565db2c4641068e5f04742831378e48c7 (diff)
downloadpsych-209e4655e60164e6d0e86e892acdc52d76b5a6b4.tar.gz
Clean up reader creation
* Skip "read" respondTo check for natural RubyIO * Remove duplicate malformed input reporting set * Flip some logic to simplify
-rw-r--r--ext/java/org/jruby/ext/psych/PsychParser.java64
1 files changed, 35 insertions, 29 deletions
diff --git a/ext/java/org/jruby/ext/psych/PsychParser.java b/ext/java/org/jruby/ext/psych/PsychParser.java
index 0f7b79e..1740ef3 100644
--- a/ext/java/org/jruby/ext/psych/PsychParser.java
+++ b/ext/java/org/jruby/ext/psych/PsychParser.java
@@ -129,45 +129,30 @@ public class PsychParser extends RubyObject {
private StreamReader readerFor(ThreadContext context, IRubyObject yaml) {
if (yaml instanceof RubyString) {
- ByteList byteList = ((RubyString)yaml).getByteList();
- Encoding enc = byteList.getEncoding();
-
- // if not unicode, transcode to UTF8
- if (!(enc instanceof UnicodeEncoding)) {
- byteList = EncodingUtils.strConvEnc(context, byteList, enc, UTF8Encoding.INSTANCE);
- enc = UTF8Encoding.INSTANCE;
- }
-
- ByteArrayInputStream bais = new ByteArrayInputStream(byteList.getUnsafeBytes(), byteList.getBegin(), byteList.getRealSize());
-
- Charset charset = enc.getCharset();
-
- assert charset != null : "charset for encoding " + enc + " should not be null";
-
- InputStreamReader isr = new InputStreamReader(bais, charset);
-
- return new StreamReader(isr);
+ return readerForString(context, (RubyString) yaml);
}
// fall back on IOInputStream, using default charset
- if (yaml.respondsTo("read")) {
- Charset charset = null;
- if (yaml instanceof RubyIO) {
+ return readerForIO(context, yaml);
+ }
+
+ private static StreamReader readerForIO(ThreadContext context, IRubyObject yaml) {
+ boolean isIO = yaml instanceof RubyIO;
+ if (isIO || yaml.respondsTo("read")) {
+ // default to UTF8 unless RubyIO has UTF16 as encoding
+ Charset charset = RubyEncoding.UTF8;
+
+ if (isIO) {
Encoding enc = ((RubyIO) yaml).getReadEncoding();
- charset = enc.getCharset();
// libyaml treats non-utf encodings as utf-8 and hopes for the best.
- if (!(enc instanceof UTF8Encoding) && !(enc instanceof UTF16LEEncoding) && !(enc instanceof UTF16BEEncoding)) {
- charset = UTF8Encoding.INSTANCE.getCharset();
+ if (enc instanceof UTF16LEEncoding || enc instanceof UTF16BEEncoding) {
+ charset = enc.getCharset();
}
}
- if (charset == null) {
- // If we can't get it from the IO or it doesn't have a charset, fall back on UTF-8
- charset = UTF8Encoding.INSTANCE.getCharset();
- }
+
CharsetDecoder decoder = charset.newDecoder();
decoder.onMalformedInput(CodingErrorAction.REPORT);
- decoder.onMalformedInput(CodingErrorAction.REPORT);
return new StreamReader(new InputStreamReader(new IOInputStream(yaml), decoder));
} else {
@@ -177,6 +162,27 @@ public class PsychParser extends RubyObject {
}
}
+ private static StreamReader readerForString(ThreadContext context, RubyString string) {
+ ByteList byteList = string.getByteList();
+ Encoding enc = byteList.getEncoding();
+
+ // if not unicode, transcode to UTF8
+ if (!(enc instanceof UnicodeEncoding)) {
+ byteList = EncodingUtils.strConvEnc(context, byteList, enc, UTF8Encoding.INSTANCE);
+ enc = UTF8Encoding.INSTANCE;
+ }
+
+ ByteArrayInputStream bais = new ByteArrayInputStream(byteList.getUnsafeBytes(), byteList.getBegin(), byteList.getRealSize());
+
+ Charset charset = enc.getCharset();
+
+ assert charset != null : "charset for encoding " + enc + " should not be null";
+
+ InputStreamReader isr = new InputStreamReader(bais, charset);
+
+ return new StreamReader(isr);
+ }
+
@JRubyMethod(name = "_native_parse")
public IRubyObject parse(ThreadContext context, IRubyObject handler, IRubyObject yaml, IRubyObject path) {
Ruby runtime = context.runtime;