From e93e44a627d781960847493a49405e014deb29b8 Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Fri, 13 Jan 2023 02:55:52 -0600 Subject: Initial move to SnakeYAML Engine See jruby/jruby#7570 for some of the justification for this move. We only require the parser from SnakeYAML, but in the original form it is encumbered with Java object serialization code that keeps getting flagged as a CVE risk. We disagree with the assessment, at least as it pertains to JRuby (we do not use the code in question) but our inclusion of the library continues to get flagged by auditing tools. This commit starts the process of moving to the successor library, SnakeYAML Engine. The parser API is largely unchanged, except as seen in this commit. No Java exceptions are thrown, but a number of Psych tests fail (possibly due to Engine being YAML 1.2 only). --- ext/java/org/jruby/ext/psych/PsychEmitter.java | 201 +++++++++++----------- ext/java/org/jruby/ext/psych/PsychLibrary.java | 32 +--- ext/java/org/jruby/ext/psych/PsychParser.java | 222 ++++++++++++++----------- ext/java/org/jruby/ext/psych/PsychToRuby.java | 5 +- 4 files changed, 237 insertions(+), 223 deletions(-) (limited to 'ext') diff --git a/ext/java/org/jruby/ext/psych/PsychEmitter.java b/ext/java/org/jruby/ext/psych/PsychEmitter.java index f2e6498..5e680d1 100644 --- a/ext/java/org/jruby/ext/psych/PsychEmitter.java +++ b/ext/java/org/jruby/ext/psych/PsychEmitter.java @@ -27,48 +27,52 @@ ***** END LICENSE BLOCK *****/ package org.jruby.ext.psych; -import java.io.IOException; -import java.io.OutputStreamWriter; -import java.io.Writer; -import java.nio.charset.Charset; -import java.util.HashMap; -import java.util.Map; - import org.jcodings.Encoding; import org.jcodings.specific.UTF8Encoding; import org.jruby.Ruby; import org.jruby.RubyArray; import org.jruby.RubyBoolean; import org.jruby.RubyClass; -import org.jruby.RubyFixnum; import org.jruby.RubyModule; import org.jruby.RubyObject; import org.jruby.RubyString; import org.jruby.anno.JRubyMethod; -import org.jruby.runtime.ObjectAllocator; import org.jruby.runtime.ThreadContext; import org.jruby.runtime.builtin.IRubyObject; import org.jruby.util.IOOutputStream; import org.jruby.util.TypeConverter; import org.jruby.util.io.EncodingUtils; -import org.yaml.snakeyaml.DumperOptions; -import org.yaml.snakeyaml.emitter.Emitter; -import org.yaml.snakeyaml.emitter.EmitterException; -import org.yaml.snakeyaml.error.Mark; -import org.yaml.snakeyaml.events.AliasEvent; -import org.yaml.snakeyaml.events.DocumentEndEvent; -import org.yaml.snakeyaml.events.DocumentStartEvent; -import org.yaml.snakeyaml.events.Event; -import org.yaml.snakeyaml.events.ImplicitTuple; -import org.yaml.snakeyaml.events.MappingEndEvent; -import org.yaml.snakeyaml.events.MappingStartEvent; -import org.yaml.snakeyaml.events.ScalarEvent; -import org.yaml.snakeyaml.events.SequenceEndEvent; -import org.yaml.snakeyaml.events.SequenceStartEvent; -import org.yaml.snakeyaml.events.StreamEndEvent; -import org.yaml.snakeyaml.events.StreamStartEvent; - -import static org.jruby.runtime.Visibility.*; +import org.snakeyaml.engine.v2.api.DumpSettings; +import org.snakeyaml.engine.v2.api.DumpSettingsBuilder; +import org.snakeyaml.engine.v2.api.StreamDataWriter; +import org.snakeyaml.engine.v2.api.YamlOutputStreamWriter; +import org.snakeyaml.engine.v2.common.Anchor; +import org.snakeyaml.engine.v2.common.FlowStyle; +import org.snakeyaml.engine.v2.common.ScalarStyle; +import org.snakeyaml.engine.v2.common.SpecVersion; +import org.snakeyaml.engine.v2.emitter.Emitter; +import org.snakeyaml.engine.v2.events.AliasEvent; +import org.snakeyaml.engine.v2.events.DocumentEndEvent; +import org.snakeyaml.engine.v2.events.DocumentStartEvent; +import org.snakeyaml.engine.v2.events.Event; +import org.snakeyaml.engine.v2.events.ImplicitTuple; +import org.snakeyaml.engine.v2.events.MappingEndEvent; +import org.snakeyaml.engine.v2.events.MappingStartEvent; +import org.snakeyaml.engine.v2.events.ScalarEvent; +import org.snakeyaml.engine.v2.events.SequenceEndEvent; +import org.snakeyaml.engine.v2.events.SequenceStartEvent; +import org.snakeyaml.engine.v2.events.StreamEndEvent; +import org.snakeyaml.engine.v2.events.StreamStartEvent; +import org.snakeyaml.engine.v2.exceptions.EmitterException; +import org.snakeyaml.engine.v2.exceptions.Mark; + +import java.io.IOException; +import java.nio.charset.Charset; +import java.util.HashMap; +import java.util.Map; +import java.util.Optional; + +import static org.jruby.runtime.Visibility.PRIVATE; public class PsychEmitter extends RubyObject { public static void initPsychEmitter(Ruby runtime, RubyModule psych) { @@ -84,8 +88,7 @@ public class PsychEmitter extends RubyObject { @JRubyMethod(visibility = PRIVATE) public IRubyObject initialize(ThreadContext context, IRubyObject io) { - options = new DumperOptions(); - options.setIndent(2); + dumpSettingsBuilder.setIndent(2); this.io = io; @@ -98,10 +101,8 @@ public class PsychEmitter extends RubyObject { IRubyObject canonical = rbOptions.callMethod(context, "canonical"); IRubyObject level = rbOptions.callMethod(context, "indentation"); - options = new DumperOptions(); - - options.setCanonical(canonical.isTrue()); - options.setIndent((int)level.convertToInteger().getLongValue()); + dumpSettingsBuilder.setCanonical(canonical.isTrue()); + dumpSettingsBuilder.setIndent((int)level.convertToInteger().getLongValue()); line_width_set(context, width); this.io = io; @@ -133,9 +134,7 @@ public class PsychEmitter extends RubyObject { public IRubyObject start_document(ThreadContext context, IRubyObject _version, IRubyObject tags, IRubyObject implicit) { Ruby runtime = context.runtime; - DumperOptions.Version version = null; boolean implicitBool = implicit.isTrue(); - Map tagsMap = null; RubyClass arrayClass = runtime.getArray(); TypeConverter.checkType(context, _version, arrayClass); @@ -145,18 +144,14 @@ public class PsychEmitter extends RubyObject { int versionInt0 = versionAry.eltInternal(0).convertToInteger().getIntValue(); int versionInt1 = versionAry.eltInternal(1).convertToInteger().getIntValue(); - if (versionInt0 == 1) { - if (versionInt1 == 0) { - version = DumperOptions.Version.V1_0; - } else if (versionInt1 == 1) { - version = DumperOptions.Version.V1_1; - } - } - if (version == null) { - throw runtime.newArgumentError("invalid YAML version: " + versionAry); + if (versionInt0 != 1 || versionInt1 != 2) { +// throw runtime.newArgumentError("invalid YAML version: " + versionAry); } } + SpecVersion version = new SpecVersion(1, 2); + Map tagsMap = new HashMap<>(); + if (!tags.isNil()) { TypeConverter.checkType(context, tags, arrayClass); @@ -177,14 +172,14 @@ public class PsychEmitter extends RubyObject { } } - DocumentStartEvent event = new DocumentStartEvent(NULL_MARK, NULL_MARK, !implicitBool, version, tagsMap); + DocumentStartEvent event = new DocumentStartEvent(!implicitBool, Optional.ofNullable(version), tagsMap, NULL_MARK, NULL_MARK); emit(context, event); return this; } @JRubyMethod public IRubyObject end_document(ThreadContext context, IRubyObject implicit) { - DocumentEndEvent event = new DocumentEndEvent(NULL_MARK, NULL_MARK, !implicit.isTrue()); + DocumentEndEvent event = new DocumentEndEvent(!implicit.isTrue(), NULL_MARK, NULL_MARK); emit(context, event); return this; } @@ -206,17 +201,17 @@ public class PsychEmitter extends RubyObject { valueStr = EncodingUtils.strConvEnc(context, valueStr, valueStr.getEncoding(), UTF8Encoding.INSTANCE); - RubyString anchorStr = exportToUTF8(context, anchor, stringClass); - RubyString tagStr = exportToUTF8(context, tag, stringClass); + String anchorStr = exportToUTF8(context, anchor, stringClass); + String tagStr = exportToUTF8(context, tag, stringClass); ScalarEvent event = new ScalarEvent( - anchorStr == null ? null : anchorStr.asJavaString(), - tagStr == null ? null : tagStr.asJavaString(), + Optional.ofNullable(anchorStr == null ? null : new Anchor(anchorStr)), + Optional.ofNullable(tagStr), new ImplicitTuple(plain.isTrue(), quoted.isTrue()), valueStr.asJavaString(), + SCALAR_STYLES[style.convertToInteger().getIntValue()], NULL_MARK, - NULL_MARK, - SCALAR_STYLES[style.convertToInteger().getIntValue()]); + NULL_MARK); emit(context, event); @@ -232,16 +227,16 @@ public class PsychEmitter extends RubyObject { RubyClass stringClass = context.runtime.getString(); - RubyString anchorStr = exportToUTF8(context, anchor, stringClass); - RubyString tagStr = exportToUTF8(context, tag, stringClass); + String anchorStr = exportToUTF8(context, anchor, stringClass); + String tagStr = exportToUTF8(context, tag, stringClass); SequenceStartEvent event = new SequenceStartEvent( - anchorStr == null ? null : anchorStr.asJavaString(), - tagStr == null ? null : tagStr.asJavaString(), + Optional.ofNullable(anchorStr == null ? null : new Anchor(anchorStr)), + Optional.ofNullable(tagStr), implicit.isTrue(), + FLOW_STYLES[style.convertToInteger().getIntValue()], NULL_MARK, - NULL_MARK, - FLOW_STYLES[style.convertToInteger().getIntValue()]); + NULL_MARK); emit(context, event); return this; } @@ -262,16 +257,16 @@ public class PsychEmitter extends RubyObject { RubyClass stringClass = context.runtime.getString(); - RubyString anchorStr = exportToUTF8(context, anchor, stringClass); - RubyString tagStr = exportToUTF8(context, tag, stringClass); + String anchorStr = exportToUTF8(context, anchor, stringClass); + String tagStr = exportToUTF8(context, tag, stringClass); MappingStartEvent event = new MappingStartEvent( - anchorStr == null ? null : anchorStr.asJavaString(), - tagStr == null ? null : tagStr.asJavaString(), + Optional.ofNullable(anchorStr == null ? null : new Anchor(anchorStr)), + Optional.ofNullable(tagStr), implicit.isTrue(), + FLOW_STYLES[style.convertToInteger().getIntValue()], NULL_MARK, - NULL_MARK, - FLOW_STYLES[style.convertToInteger().getIntValue()]); + NULL_MARK); emit(context, event); @@ -289,9 +284,9 @@ public class PsychEmitter extends RubyObject { public IRubyObject alias(ThreadContext context, IRubyObject anchor) { RubyClass stringClass = context.runtime.getString(); - RubyString anchorStr = exportToUTF8(context, anchor, stringClass); + String anchorStr = exportToUTF8(context, anchor, stringClass); - AliasEvent event = new AliasEvent(anchorStr.asJavaString(), NULL_MARK, NULL_MARK); + AliasEvent event = new AliasEvent(Optional.of(new Anchor(anchorStr)), NULL_MARK, NULL_MARK); emit(context, event); return this; } @@ -299,40 +294,40 @@ public class PsychEmitter extends RubyObject { @JRubyMethod(name = "canonical=") public IRubyObject canonical_set(ThreadContext context, IRubyObject canonical) { // TODO: unclear if this affects a running emitter - options.setCanonical(canonical.isTrue()); + dumpSettingsBuilder.setCanonical(canonical.isTrue()); return canonical; } @JRubyMethod public IRubyObject canonical(ThreadContext context) { // TODO: unclear if this affects a running emitter - return RubyBoolean.newBoolean(context, options.isCanonical()); + return RubyBoolean.newBoolean(context, buildDumpSettings().isCanonical()); } @JRubyMethod(name = "indentation=") public IRubyObject indentation_set(ThreadContext context, IRubyObject level) { // TODO: unclear if this affects a running emitter - options.setIndent((int)level.convertToInteger().getLongValue()); + dumpSettingsBuilder.setIndent(level.convertToInteger().getIntValue()); return level; } @JRubyMethod public IRubyObject indentation(ThreadContext context) { // TODO: unclear if this affects a running emitter - return context.runtime.newFixnum(options.getIndent()); + return context.runtime.newFixnum(buildDumpSettings().getIndent()); } @JRubyMethod(name = "line_width=") public IRubyObject line_width_set(ThreadContext context, IRubyObject width) { - int newWidth = (int)width.convertToInteger().getLongValue(); + int newWidth = width.convertToInteger().getIntValue(); if (newWidth <= 0) newWidth = Integer.MAX_VALUE; - options.setWidth(newWidth); + dumpSettingsBuilder.setWidth(newWidth); return width; } @JRubyMethod public IRubyObject line_width(ThreadContext context) { - return context.runtime.newFixnum(options.getWidth()); + return context.runtime.newFixnum(buildDumpSettings().getWidth()); } private void emit(ThreadContext context, Event event) { @@ -343,8 +338,6 @@ public class PsychEmitter extends RubyObject { // flush writer after each emit writer.flush(); - } catch (IOException ioe) { - throw context.runtime.newIOErrorFromException(ioe); } catch (EmitterException ee) { throw context.runtime.newRuntimeError(ee.toString()); } @@ -356,41 +349,53 @@ public class PsychEmitter extends RubyObject { Encoding encoding = PsychLibrary.YAMLEncoding.values()[(int)_encoding.convertToInteger().getLongValue()].encoding; Charset charset = context.runtime.getEncodingService().charsetForEncoding(encoding); - writer = new OutputStreamWriter(new IOOutputStream(io, encoding), charset); - emitter = new Emitter(writer, options); + writer = new YamlOutputStreamWriter(new IOOutputStream(io, encoding), charset) { + @Override + public void processIOException(IOException ioe) { + throw context.runtime.newIOErrorFromException(ioe); + } + }; + emitter = new Emitter(buildDumpSettings(), writer); + } + + private DumpSettings buildDumpSettings() { + return dumpSettingsBuilder.build(); } - private RubyString exportToUTF8(ThreadContext context, IRubyObject tag, RubyClass stringClass) { - RubyString tagStr = null; - if (!tag.isNil()) { - TypeConverter.checkType(context, tag, stringClass); - tagStr = (RubyString) tag; - tagStr = EncodingUtils.strConvEnc(context, tagStr, tagStr.getEncoding(), UTF8Encoding.INSTANCE); + private String exportToUTF8(ThreadContext context, IRubyObject tag, RubyClass stringClass) { + if (tag.isNil()) { + return null; } - return tagStr; + + RubyString tagStr; + + TypeConverter.checkType(context, tag, stringClass); + tagStr = (RubyString) tag; + + return EncodingUtils.strConvEnc(context, tagStr, tagStr.getEncoding(), UTF8Encoding.INSTANCE).asJavaString(); } Emitter emitter; - Writer writer; - DumperOptions options = new DumperOptions(); + StreamDataWriter writer; + final DumpSettingsBuilder dumpSettingsBuilder = DumpSettings.builder(); IRubyObject io; - private static final Mark NULL_MARK = new Mark("", 0, 0, 0, new int[0], 0); + private static final Optional NULL_MARK = Optional.empty(); // Map style constants from Psych values (ANY = 0 ... FOLDED = 5) // to SnakeYaml values; see psych/nodes/scalar.rb. - private static final DumperOptions.ScalarStyle[] SCALAR_STYLES = { - DumperOptions.ScalarStyle.PLAIN, // ANY - DumperOptions.ScalarStyle.PLAIN, - DumperOptions.ScalarStyle.SINGLE_QUOTED, - DumperOptions.ScalarStyle.DOUBLE_QUOTED, - DumperOptions.ScalarStyle.LITERAL, - DumperOptions.ScalarStyle.FOLDED + private static final ScalarStyle[] SCALAR_STYLES = { + ScalarStyle.PLAIN, // ANY + ScalarStyle.PLAIN, + ScalarStyle.SINGLE_QUOTED, + ScalarStyle.DOUBLE_QUOTED, + ScalarStyle.LITERAL, + ScalarStyle.FOLDED }; - private static final DumperOptions.FlowStyle[] FLOW_STYLES = { - DumperOptions.FlowStyle.AUTO, - DumperOptions.FlowStyle.BLOCK, - DumperOptions.FlowStyle.FLOW + private static final FlowStyle[] FLOW_STYLES = { + FlowStyle.AUTO, + FlowStyle.BLOCK, + FlowStyle.FLOW }; } diff --git a/ext/java/org/jruby/ext/psych/PsychLibrary.java b/ext/java/org/jruby/ext/psych/PsychLibrary.java index 5141ea6..d80282f 100644 --- a/ext/java/org/jruby/ext/psych/PsychLibrary.java +++ b/ext/java/org/jruby/ext/psych/PsychLibrary.java @@ -27,10 +27,6 @@ ***** END LICENSE BLOCK *****/ package org.jruby.ext.psych; -import java.io.InputStream; -import java.io.IOException; -import java.util.Properties; - import org.jcodings.Encoding; import org.jcodings.specific.UTF16BEEncoding; import org.jcodings.specific.UTF16LEEncoding; @@ -44,7 +40,10 @@ import org.jruby.runtime.ThreadContext; import org.jruby.runtime.Visibility; import org.jruby.runtime.builtin.IRubyObject; import org.jruby.runtime.load.Library; -import org.yaml.snakeyaml.error.Mark; + +import java.io.IOException; +import java.io.InputStream; +import java.util.Properties; public class PsychLibrary implements Library { private static final String DUMMY_VERSION = "0.0"; @@ -54,7 +53,7 @@ public class PsychLibrary implements Library { // load version from properties packed with the jar Properties props = new Properties(); - try( InputStream is = runtime.getJRubyClassLoader().getResourceAsStream("META-INF/maven/org.yaml/snakeyaml/pom.properties") ) { + try( InputStream is = runtime.getJRubyClassLoader().getResourceAsStream("META-INF/maven/org.snakeyaml/snakeyaml-engine/pom.properties") ) { props.load(is); } catch( IOException e ) { @@ -66,27 +65,6 @@ public class PsychLibrary implements Library { snakeyamlVersion = snakeyamlVersion.substring(0, snakeyamlVersion.length() - "-SNAPSHOT".length()); } - // Try to determine if we have a new enough SnakeYAML. - // Versions before 1.21 removed a Mark constructor that JRuby uses. - // See https://github.com/bundler/bundler/issues/6878 - if (snakeyamlVersion.equals(DUMMY_VERSION)) { - try { - // Use reflection to try to confirm we have a new enough version - Mark.class.getConstructor(String.class, int.class, int.class, int.class, int[].class, int.class); - } catch (NoSuchMethodException nsme) { - throw runtime.newLoadError("bad SnakeYAML version, required 1.21 or higher; check your CLASSPATH for a conflicting jar"); - } - } else { - // Parse version string to check for 1.21+ - String[] majorMinor = snakeyamlVersion.split("\\."); - - if (majorMinor.length < 2 || Integer.parseInt(majorMinor[0]) < 1 || Integer.parseInt(majorMinor[1]) < 21) { - throw runtime.newLoadError( - "bad SnakeYAML version " + snakeyamlVersion + - ", required 1.21 or higher; check your CLASSPATH for a conflicting jar"); - } - } - RubyString version = runtime.newString(snakeyamlVersion + ".0"); version.setFrozen(true); psych.setConstant("SNAKEYAML_VERSION", version); diff --git a/ext/java/org/jruby/ext/psych/PsychParser.java b/ext/java/org/jruby/ext/psych/PsychParser.java index c4280d4..ccd843b 100644 --- a/ext/java/org/jruby/ext/psych/PsychParser.java +++ b/ext/java/org/jruby/ext/psych/PsychParser.java @@ -27,15 +27,6 @@ ***** END LICENSE BLOCK *****/ package org.jruby.ext.psych; -import java.io.ByteArrayInputStream; -import java.io.InputStreamReader; -import java.nio.charset.Charset; -import java.nio.charset.CharsetDecoder; -import java.nio.charset.CodingErrorAction; -import java.nio.charset.MalformedInputException; -import java.util.Arrays; -import java.util.Map; - import org.jcodings.Encoding; import org.jcodings.specific.UTF16BEEncoding; import org.jcodings.specific.UTF16LEEncoding; @@ -52,48 +43,58 @@ import org.jruby.RubyModule; import org.jruby.RubyObject; import org.jruby.RubyString; import org.jruby.anno.JRubyMethod; -import static org.jruby.ext.psych.PsychLibrary.YAMLEncoding.*; import org.jruby.runtime.Block; import org.jruby.runtime.Helpers; import org.jruby.runtime.ThreadContext; import org.jruby.runtime.builtin.IRubyObject; import org.jruby.runtime.callsite.CachingCallSite; import org.jruby.runtime.callsite.FunctionalCachingCallSite; +import org.jruby.util.ByteList; import org.jruby.util.IOInputStream; import org.jruby.util.io.EncodingUtils; -import org.yaml.snakeyaml.DumperOptions; -import org.yaml.snakeyaml.error.Mark; -import org.yaml.snakeyaml.error.MarkedYAMLException; -import org.yaml.snakeyaml.error.YAMLException; -import org.yaml.snakeyaml.events.AliasEvent; -import org.yaml.snakeyaml.events.DocumentEndEvent; -import org.yaml.snakeyaml.events.DocumentStartEvent; -import org.yaml.snakeyaml.events.Event; -import org.yaml.snakeyaml.events.Event.ID; -import org.yaml.snakeyaml.events.MappingStartEvent; -import org.yaml.snakeyaml.events.ScalarEvent; -import org.yaml.snakeyaml.events.SequenceStartEvent; -import org.yaml.snakeyaml.parser.Parser; -import org.yaml.snakeyaml.parser.ParserException; -import org.yaml.snakeyaml.parser.ParserImpl; -import org.yaml.snakeyaml.reader.ReaderException; -import org.yaml.snakeyaml.reader.StreamReader; -import org.yaml.snakeyaml.scanner.ScannerException; +import org.snakeyaml.engine.v2.api.LoadSettings; +import org.snakeyaml.engine.v2.api.LoadSettingsBuilder; +import org.snakeyaml.engine.v2.common.Anchor; +import org.snakeyaml.engine.v2.common.FlowStyle; +import org.snakeyaml.engine.v2.common.ScalarStyle; +import org.snakeyaml.engine.v2.common.SpecVersion; +import org.snakeyaml.engine.v2.events.AliasEvent; +import org.snakeyaml.engine.v2.events.DocumentEndEvent; +import org.snakeyaml.engine.v2.events.DocumentStartEvent; +import org.snakeyaml.engine.v2.events.Event; +import org.snakeyaml.engine.v2.events.MappingStartEvent; +import org.snakeyaml.engine.v2.events.ScalarEvent; +import org.snakeyaml.engine.v2.events.SequenceStartEvent; +import org.snakeyaml.engine.v2.exceptions.Mark; +import org.snakeyaml.engine.v2.exceptions.MarkedYamlEngineException; +import org.snakeyaml.engine.v2.exceptions.ParserException; +import org.snakeyaml.engine.v2.exceptions.ReaderException; +import org.snakeyaml.engine.v2.exceptions.ScannerException; +import org.snakeyaml.engine.v2.exceptions.YamlEngineException; +import org.snakeyaml.engine.v2.parser.Parser; +import org.snakeyaml.engine.v2.parser.ParserImpl; +import org.snakeyaml.engine.v2.scanner.ScannerImpl; +import org.snakeyaml.engine.v2.scanner.StreamReader; +import org.snakeyaml.engine.v2.schema.CoreSchema; +import java.io.ByteArrayInputStream; +import java.io.InputStreamReader; +import java.nio.charset.Charset; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CodingErrorAction; +import java.nio.charset.MalformedInputException; +import java.util.Arrays; +import java.util.Map; +import java.util.Optional; + +import static org.jruby.ext.psych.PsychLibrary.YAMLEncoding.*; import static org.jruby.runtime.Helpers.arrayOf; import static org.jruby.runtime.Helpers.invoke; -import org.jruby.util.ByteList; public class PsychParser extends RubyObject { public static final String JRUBY_CALL_SITES = "_jruby_call_sites"; - private enum Call { - path, event_location, start_stream, start_document, end_document, alias, scalar, start_sequence, end_sequence, start_mapping, end_mapping, end_stream - } - - final CachingCallSite[] sites; - public static void initPsychParser(Ruby runtime, RubyModule psych) { RubyClass psychParser = runtime.defineClassUnder("Parser", runtime.getObject(), PsychParser::new, psych); @@ -116,15 +117,22 @@ public class PsychParser extends RubyObject { super(runtime, klass); this.sites = (CachingCallSite[]) klass.getInternalVariable(JRUBY_CALL_SITES); + this.loadSettingsBuilder = LoadSettings.builder().setSchema(new CoreSchema()); } - private IRubyObject stringOrNilFor(ThreadContext context, String value) { - if (value == null) return context.nil; + private IRubyObject stringOrNilForAnchor(ThreadContext context, Optional value) { + if (!value.isPresent()) return context.nil; - return stringFor(context, value); + return stringFor(context, value.get().getValue()); + } + + private IRubyObject stringOrNilFor(ThreadContext context, Optional value) { + if (!value.isPresent()) return context.nil; + + return stringFor(context, value.get()); } - private RubyString stringFor(ThreadContext context, String value) { + private IRubyObject stringFor(ThreadContext context, String value) { Ruby runtime = context.runtime; Encoding encoding = runtime.getDefaultInternalEncoding(); @@ -143,16 +151,16 @@ public class PsychParser extends RubyObject { return string; } - private StreamReader readerFor(ThreadContext context, IRubyObject yaml) { + private StreamReader readerFor(ThreadContext context, IRubyObject yaml, LoadSettings loadSettings) { if (yaml instanceof RubyString) { - return readerForString(context, (RubyString) yaml); + return readerForString(context, (RubyString) yaml, loadSettings); } // fall back on IOInputStream, using default charset - return readerForIO(context, yaml); + return readerForIO(context, yaml, loadSettings); } - private static StreamReader readerForIO(ThreadContext context, IRubyObject yaml) { + private static StreamReader readerForIO(ThreadContext context, IRubyObject yaml, LoadSettings loadSettings) { boolean isIO = yaml instanceof RubyIO; if (isIO || yaml.respondsTo("read")) { // default to UTF8 unless RubyIO has UTF16 as encoding @@ -170,7 +178,7 @@ public class PsychParser extends RubyObject { CharsetDecoder decoder = charset.newDecoder(); decoder.onMalformedInput(CodingErrorAction.REPORT); - return new StreamReader(new InputStreamReader(new IOInputStream(yaml), decoder)); + return new StreamReader(loadSettings, new InputStreamReader(new IOInputStream(yaml), decoder)); } else { Ruby runtime = context.runtime; @@ -178,7 +186,7 @@ public class PsychParser extends RubyObject { } } - private static StreamReader readerForString(ThreadContext context, RubyString string) { + private static StreamReader readerForString(ThreadContext context, RubyString string, LoadSettings loadSettings) { ByteList byteList = string.getByteList(); Encoding enc = byteList.getEncoding(); @@ -196,7 +204,7 @@ public class PsychParser extends RubyObject { InputStreamReader isr = new InputStreamReader(bais, charset); - return new StreamReader(isr); + return new StreamReader(loadSettings, isr); } @JRubyMethod(name = "_native_parse") @@ -204,52 +212,61 @@ public class PsychParser extends RubyObject { Ruby runtime = context.runtime; try { - parser = new ParserImpl(readerFor(context, yaml)); + LoadSettings loadSettings = loadSettingsBuilder.build(); + parser = new ParserImpl(loadSettings, new ScannerImpl(loadSettings, readerFor(context, yaml, loadSettings))); if (path.isNil() && yaml.respondsTo("path")) { path = sites[Call.path.ordinal()].call(context, this, yaml); } - while (true) { - event = parser.getEvent(); + while (parser.hasNext()) { + event = parser.next(); - Mark start = event.getStartMark(); + Mark start = event.getStartMark().orElseThrow(RuntimeException::new); IRubyObject start_line = runtime.newFixnum(start.getLine()); IRubyObject start_column = runtime.newFixnum(start.getColumn()); - Mark end = event.getEndMark(); + Mark end = event.getEndMark().orElseThrow(RuntimeException::new); IRubyObject end_line = runtime.newFixnum(end.getLine()); IRubyObject end_column = runtime.newFixnum(end.getColumn()); sites[Call.event_location.ordinal()].call(context, this, handler, start_line, start_column, end_line, end_column); - // FIXME: Event should expose a getID, so it can be switched - if (event.is(ID.StreamStart)) { - sites[Call.start_stream.ordinal()].call(context, this, handler, runtime.newFixnum(YAML_ANY_ENCODING.ordinal())); - } else if (event.is(ID.DocumentStart)) { - handleDocumentStart(context, (DocumentStartEvent) event, handler); - } else if (event.is(ID.DocumentEnd)) { - IRubyObject notExplicit = runtime.newBoolean(!((DocumentEndEvent) event).getExplicit()); - - sites[Call.end_document.ordinal()].call(context, this, handler, notExplicit); - } else if (event.is(ID.Alias)) { - IRubyObject alias = stringOrNilFor(context, ((AliasEvent)event).getAnchor()); - - sites[Call.alias.ordinal()].call(context, this, handler, alias); - } else if (event.is(ID.Scalar)) { - handleScalar(context, (ScalarEvent) event, handler); - } else if (event.is(ID.SequenceStart)) { - handleSequenceStart(context, (SequenceStartEvent) event, handler); - } else if (event.is(ID.SequenceEnd)) { - sites[Call.end_sequence.ordinal()].call(context, this, handler); - } else if (event.is(ID.MappingStart)) { - handleMappingStart(context, (MappingStartEvent) event, handler); - } else if (event.is(ID.MappingEnd)) { - sites[Call.end_mapping.ordinal()].call(context, this, handler); - } else if (event.is(ID.StreamEnd)) { - sites[Call.end_stream.ordinal()].call(context, this, handler); - - break; + switch (event.getEventId()) { + case StreamStart: + sites[Call.start_stream.ordinal()].call(context, this, handler, runtime.newFixnum(YAML_ANY_ENCODING.ordinal())); + break; + case DocumentStart: + handleDocumentStart(context, (DocumentStartEvent) event, handler); + break; + case DocumentEnd: + IRubyObject notExplicit = runtime.newBoolean(!((DocumentEndEvent) event).isExplicit()); + + sites[Call.end_document.ordinal()].call(context, this, handler, notExplicit); + break; + case Alias: + IRubyObject alias = stringOrNilForAnchor(context, ((AliasEvent) event).getAnchor()); + + sites[Call.alias.ordinal()].call(context, this, handler, alias); + break; + case Scalar: + handleScalar(context, (ScalarEvent) event, handler); + break; + case SequenceStart: + handleSequenceStart(context, (SequenceStartEvent) event, handler); + break; + case SequenceEnd: + sites[Call.end_sequence.ordinal()].call(context, this, handler); + break; + case MappingStart: + handleMappingStart(context, (MappingStartEvent) event, handler); + break; + case MappingEnd: + sites[Call.end_mapping.ordinal()].call(context, this, handler); + break; + case StreamEnd: + sites[Call.end_stream.ordinal()].call(context, this, handler); + break; } } } catch (ParserException pe) { @@ -268,7 +285,7 @@ public class PsychParser extends RubyObject { parser = null; raiseParserException(context, re, path); - } catch (YAMLException ye) { + } catch (YamlEngineException ye) { Throwable cause = ye.getCause(); if (cause instanceof MalformedInputException) { @@ -288,11 +305,11 @@ public class PsychParser extends RubyObject { private void handleDocumentStart(ThreadContext context, DocumentStartEvent dse, IRubyObject handler) { Ruby runtime = context.runtime; - DumperOptions.Version _version = dse.getVersion(); + SpecVersion _version = dse.getSpecVersion().orElse(new SpecVersion(1, 2)); IRubyObject version = _version == null ? RubyArray.newArray(runtime) : - RubyArray.newArray(runtime, runtime.newFixnum(_version.major()), runtime.newFixnum(_version.minor())); - + RubyArray.newArray(runtime, runtime.newFixnum(_version.getMajor()), runtime.newFixnum(_version.getMinor())); + Map tagsMap = dse.getTags(); RubyArray tags = RubyArray.newArray(runtime); if (tagsMap != null && tagsMap.size() > 0) { @@ -303,16 +320,16 @@ public class PsychParser extends RubyObject { tags.append(RubyArray.newArray(runtime, key, value)); } } - IRubyObject notExplicit = runtime.newBoolean(!dse.getExplicit()); + IRubyObject notExplicit = runtime.newBoolean(!dse.isExplicit()); invoke(context, handler, "start_document", version, tags, notExplicit); } private void handleMappingStart(ThreadContext context, MappingStartEvent mse, IRubyObject handler) { Ruby runtime = context.runtime; - IRubyObject anchor = stringOrNilFor(context, mse.getAnchor()); + IRubyObject anchor = stringOrNilForAnchor(context, mse.getAnchor()); IRubyObject tag = stringOrNilFor(context, mse.getTag()); - IRubyObject implicit = runtime.newBoolean(mse.getImplicit()); + IRubyObject implicit = runtime.newBoolean(mse.isImplicit()); IRubyObject style = runtime.newFixnum(translateFlowStyle(mse.getFlowStyle())); sites[Call.start_mapping.ordinal()].call(context, this, handler, anchor, tag, implicit, style); @@ -321,7 +338,7 @@ public class PsychParser extends RubyObject { private void handleScalar(ThreadContext context, ScalarEvent se, IRubyObject handler) { Ruby runtime = context.runtime; - IRubyObject anchor = stringOrNilFor(context, se.getAnchor()); + IRubyObject anchor = stringOrNilForAnchor(context, se.getAnchor()); IRubyObject tag = stringOrNilFor(context, se.getTag()); IRubyObject plain_implicit = runtime.newBoolean(se.getImplicit().canOmitTagInPlainScalar()); IRubyObject quoted_implicit = runtime.newBoolean(se.getImplicit().canOmitTagInNonPlainScalar()); @@ -334,9 +351,9 @@ public class PsychParser extends RubyObject { private void handleSequenceStart(ThreadContext context, SequenceStartEvent sse, IRubyObject handler) { Ruby runtime = context.runtime; - IRubyObject anchor = stringOrNilFor(context, sse.getAnchor()); + IRubyObject anchor = stringOrNilForAnchor(context, sse.getAnchor()); IRubyObject tag = stringOrNilFor(context, sse.getTag()); - IRubyObject implicit = runtime.newBoolean(sse.getImplicit()); + IRubyObject implicit = runtime.newBoolean(sse.isImplicit()); IRubyObject style = runtime.newFixnum(translateFlowStyle(sse.getFlowStyle())); sites[Call.start_sequence.ordinal()].call(context, this, handler, anchor, tag, implicit, style); @@ -364,7 +381,7 @@ public class PsychParser extends RubyObject { RubyKernel.raise(context, runtime.getKernel(), new IRubyObject[] { exception }, Block.NULL_BLOCK); } - private static void raiseParserException(ThreadContext context, MarkedYAMLException mye, IRubyObject rbPath) { + private static void raiseParserException(ThreadContext context, MarkedYamlEngineException mye, IRubyObject rbPath) { Ruby runtime; Mark mark; RubyClass se; @@ -373,7 +390,7 @@ public class PsychParser extends RubyObject { runtime = context.runtime; se = (RubyClass)runtime.getModule("Psych").getConstant("SyntaxError"); - mark = mye.getProblemMark(); + mark = mye.getProblemMark().get(); exception = se.newInstance(context, new IRubyObject[] { @@ -413,7 +430,7 @@ public class PsychParser extends RubyObject { RubyKernel.raise(context, runtime.getKernel(), new IRubyObject[] { exception }, Block.NULL_BLOCK); } - private static int translateStyle(DumperOptions.ScalarStyle style) { + private static int translateStyle(ScalarStyle style) { if (style == null) return 0; // any switch (style) { @@ -426,7 +443,7 @@ public class PsychParser extends RubyObject { } } - private static int translateFlowStyle(DumperOptions.FlowStyle flowStyle) { + private static int translateFlowStyle(FlowStyle flowStyle) { switch (flowStyle) { case AUTO: return 0; case BLOCK: return 1; @@ -442,9 +459,11 @@ public class PsychParser extends RubyObject { Event event = null; if (parser != null) { - event = parser.peekEvent(); - - if (event == null) event = this.event; + if (parser.hasNext()) { + event = parser.peekEvent(); + } else { + event = this.event; + } } if (event == null) { @@ -457,7 +476,7 @@ public class PsychParser extends RubyObject { ); } - Mark mark = event.getStartMark(); + Mark mark = event.getStartMark().orElseThrow(RuntimeException::new); return ((RubyClass)context.runtime.getClassFromPath("Psych::Parser::Mark")).newInstance( context, @@ -468,6 +487,17 @@ public class PsychParser extends RubyObject { ); } + private LoadSettings buildSettings() { + return loadSettingsBuilder.build(); + } + private Parser parser; private Event event; + private final LoadSettingsBuilder loadSettingsBuilder; + + private enum Call { + path, event_location, start_stream, start_document, end_document, alias, scalar, start_sequence, end_sequence, start_mapping, end_mapping, end_stream + } + + final CachingCallSite[] sites; } diff --git a/ext/java/org/jruby/ext/psych/PsychToRuby.java b/ext/java/org/jruby/ext/psych/PsychToRuby.java index 8286837..510e353 100644 --- a/ext/java/org/jruby/ext/psych/PsychToRuby.java +++ b/ext/java/org/jruby/ext/psych/PsychToRuby.java @@ -29,14 +29,15 @@ package org.jruby.ext.psych; import org.jruby.Ruby; import org.jruby.RubyClass; +import org.jruby.RubyException; import org.jruby.RubyModule; import org.jruby.RubyObject; -import org.jruby.RubyException; import org.jruby.anno.JRubyMethod; import org.jruby.exceptions.RaiseException; import org.jruby.runtime.ThreadContext; import org.jruby.runtime.builtin.IRubyObject; -import static org.jruby.runtime.Visibility.*; + +import static org.jruby.runtime.Visibility.PRIVATE; public class PsychToRuby { public static void initPsychToRuby(Ruby runtime, RubyModule psych) { -- cgit v1.2.1 From d772d8a94d4400c88f06f757fc448d40755c09a3 Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Fri, 13 Jan 2023 08:47:23 -0600 Subject: Omit specc version from document start This eliminates the %YAML 1.2 directive at the start of each emit, which improves tests passing but also breaks a few tests that *expect* the YAML directive to be present. --- ext/java/org/jruby/ext/psych/PsychEmitter.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'ext') diff --git a/ext/java/org/jruby/ext/psych/PsychEmitter.java b/ext/java/org/jruby/ext/psych/PsychEmitter.java index 5e680d1..2ad1251 100644 --- a/ext/java/org/jruby/ext/psych/PsychEmitter.java +++ b/ext/java/org/jruby/ext/psych/PsychEmitter.java @@ -149,7 +149,6 @@ public class PsychEmitter extends RubyObject { } } - SpecVersion version = new SpecVersion(1, 2); Map tagsMap = new HashMap<>(); if (!tags.isNil()) { @@ -172,7 +171,7 @@ public class PsychEmitter extends RubyObject { } } - DocumentStartEvent event = new DocumentStartEvent(!implicitBool, Optional.ofNullable(version), tagsMap, NULL_MARK, NULL_MARK); + DocumentStartEvent event = new DocumentStartEvent(!implicitBool, Optional.empty(), tagsMap, NULL_MARK, NULL_MARK); emit(context, event); return this; } -- cgit v1.2.1 From 63428e345c6255a37a59c258dc0b7d1269ccf5f5 Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Fri, 13 Jan 2023 09:50:17 -0600 Subject: Fix up spec version handling --- ext/java/org/jruby/ext/psych/PsychEmitter.java | 11 ++++++++--- ext/java/org/jruby/ext/psych/PsychParser.java | 9 +++++---- 2 files changed, 13 insertions(+), 7 deletions(-) (limited to 'ext') diff --git a/ext/java/org/jruby/ext/psych/PsychEmitter.java b/ext/java/org/jruby/ext/psych/PsychEmitter.java index 2ad1251..9180dcc 100644 --- a/ext/java/org/jruby/ext/psych/PsychEmitter.java +++ b/ext/java/org/jruby/ext/psych/PsychEmitter.java @@ -140,13 +140,18 @@ public class PsychEmitter extends RubyObject { TypeConverter.checkType(context, _version, arrayClass); RubyArray versionAry = _version.convertToArray(); + Optional specVersion; if (versionAry.size() == 2) { int versionInt0 = versionAry.eltInternal(0).convertToInteger().getIntValue(); int versionInt1 = versionAry.eltInternal(1).convertToInteger().getIntValue(); - if (versionInt0 != 1 || versionInt1 != 2) { -// throw runtime.newArgumentError("invalid YAML version: " + versionAry); + if (versionInt0 != 1) { + throw runtime.newArgumentError("invalid YAML version: " + versionAry); } + + specVersion = Optional.of(new SpecVersion(versionInt0, versionInt1)); + } else { + specVersion = Optional.empty(); } Map tagsMap = new HashMap<>(); @@ -171,7 +176,7 @@ public class PsychEmitter extends RubyObject { } } - DocumentStartEvent event = new DocumentStartEvent(!implicitBool, Optional.empty(), tagsMap, NULL_MARK, NULL_MARK); + DocumentStartEvent event = new DocumentStartEvent(!implicitBool, specVersion, tagsMap, NULL_MARK, NULL_MARK); emit(context, event); return this; } diff --git a/ext/java/org/jruby/ext/psych/PsychParser.java b/ext/java/org/jruby/ext/psych/PsychParser.java index ccd843b..fd5e19c 100644 --- a/ext/java/org/jruby/ext/psych/PsychParser.java +++ b/ext/java/org/jruby/ext/psych/PsychParser.java @@ -305,10 +305,11 @@ public class PsychParser extends RubyObject { private void handleDocumentStart(ThreadContext context, DocumentStartEvent dse, IRubyObject handler) { Ruby runtime = context.runtime; - SpecVersion _version = dse.getSpecVersion().orElse(new SpecVersion(1, 2)); - IRubyObject version = _version == null ? - RubyArray.newArray(runtime) : - RubyArray.newArray(runtime, runtime.newFixnum(_version.getMajor()), runtime.newFixnum(_version.getMinor())); + + Optional specVersion = dse.getSpecVersion(); + IRubyObject version = specVersion.isPresent() ? + RubyArray.newArray(runtime, runtime.newFixnum(specVersion.get().getMajor()), runtime.newFixnum(specVersion.get().getMinor())) : + RubyArray.newEmptyArray(runtime); Map tagsMap = dse.getTags(); RubyArray tags = RubyArray.newArray(runtime); -- cgit v1.2.1 From 69b109924a093575fd787ae46ae234c222a1d378 Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Wed, 18 Jan 2023 22:32:48 -0600 Subject: Reduce hops to call sites If we could embed indy call sites here they would cache as constants; this is the best we can do at the moment. --- ext/java/org/jruby/ext/psych/PsychParser.java | 38 ++++++++++++++++++--------- 1 file changed, 25 insertions(+), 13 deletions(-) (limited to 'ext') diff --git a/ext/java/org/jruby/ext/psych/PsychParser.java b/ext/java/org/jruby/ext/psych/PsychParser.java index fd5e19c..85dfdec 100644 --- a/ext/java/org/jruby/ext/psych/PsychParser.java +++ b/ext/java/org/jruby/ext/psych/PsychParser.java @@ -116,7 +116,19 @@ public class PsychParser extends RubyObject { public PsychParser(Ruby runtime, RubyClass klass) { super(runtime, klass); - this.sites = (CachingCallSite[]) klass.getInternalVariable(JRUBY_CALL_SITES); + CachingCallSite[] sites = (CachingCallSite[]) klass.getInternalVariable(JRUBY_CALL_SITES); + this.path = sites[Call.path.ordinal()]; + this.event_location = sites[Call.event_location.ordinal()]; + this.start_stream = sites[Call.start_stream.ordinal()]; + this.start_document = sites[Call.start_document.ordinal()]; + this.end_document = sites[Call.end_document.ordinal()]; + this.alias = sites[Call.alias.ordinal()]; + this.scalar = sites[Call.scalar.ordinal()]; + this.start_sequence = sites[Call.start_sequence.ordinal()]; + this.end_sequence = sites[Call.end_sequence.ordinal()]; + this.start_mapping = sites[Call.start_mapping.ordinal()]; + this.end_mapping = sites[Call.end_mapping.ordinal()]; + this.end_stream = sites[Call.end_stream.ordinal()]; this.loadSettingsBuilder = LoadSettings.builder().setSchema(new CoreSchema()); } @@ -216,7 +228,7 @@ public class PsychParser extends RubyObject { parser = new ParserImpl(loadSettings, new ScannerImpl(loadSettings, readerFor(context, yaml, loadSettings))); if (path.isNil() && yaml.respondsTo("path")) { - path = sites[Call.path.ordinal()].call(context, this, yaml); + path = this.path.call(context, this, yaml); } while (parser.hasNext()) { @@ -230,11 +242,11 @@ public class PsychParser extends RubyObject { IRubyObject end_line = runtime.newFixnum(end.getLine()); IRubyObject end_column = runtime.newFixnum(end.getColumn()); - sites[Call.event_location.ordinal()].call(context, this, handler, start_line, start_column, end_line, end_column); + event_location.call(context, this, handler, start_line, start_column, end_line, end_column); switch (event.getEventId()) { case StreamStart: - sites[Call.start_stream.ordinal()].call(context, this, handler, runtime.newFixnum(YAML_ANY_ENCODING.ordinal())); + start_stream.call(context, this, handler, runtime.newFixnum(YAML_ANY_ENCODING.ordinal())); break; case DocumentStart: handleDocumentStart(context, (DocumentStartEvent) event, handler); @@ -242,12 +254,12 @@ public class PsychParser extends RubyObject { case DocumentEnd: IRubyObject notExplicit = runtime.newBoolean(!((DocumentEndEvent) event).isExplicit()); - sites[Call.end_document.ordinal()].call(context, this, handler, notExplicit); + end_document.call(context, this, handler, notExplicit); break; case Alias: IRubyObject alias = stringOrNilForAnchor(context, ((AliasEvent) event).getAnchor()); - sites[Call.alias.ordinal()].call(context, this, handler, alias); + this.alias.call(context, this, handler, alias); break; case Scalar: handleScalar(context, (ScalarEvent) event, handler); @@ -256,16 +268,16 @@ public class PsychParser extends RubyObject { handleSequenceStart(context, (SequenceStartEvent) event, handler); break; case SequenceEnd: - sites[Call.end_sequence.ordinal()].call(context, this, handler); + end_sequence.call(context, this, handler); break; case MappingStart: handleMappingStart(context, (MappingStartEvent) event, handler); break; case MappingEnd: - sites[Call.end_mapping.ordinal()].call(context, this, handler); + end_mapping.call(context, this, handler); break; case StreamEnd: - sites[Call.end_stream.ordinal()].call(context, this, handler); + end_stream.call(context, this, handler); break; } } @@ -333,7 +345,7 @@ public class PsychParser extends RubyObject { IRubyObject implicit = runtime.newBoolean(mse.isImplicit()); IRubyObject style = runtime.newFixnum(translateFlowStyle(mse.getFlowStyle())); - sites[Call.start_mapping.ordinal()].call(context, this, handler, anchor, tag, implicit, style); + start_mapping.call(context, this, handler, anchor, tag, implicit, style); } private void handleScalar(ThreadContext context, ScalarEvent se, IRubyObject handler) { @@ -346,7 +358,7 @@ public class PsychParser extends RubyObject { IRubyObject style = runtime.newFixnum(translateStyle(se.getScalarStyle())); IRubyObject val = stringFor(context, se.getValue()); - sites[Call.scalar.ordinal()].call(context, this, handler, val, anchor, tag, plain_implicit, + scalar.call(context, this, handler, val, anchor, tag, plain_implicit, quoted_implicit, style); } @@ -357,7 +369,7 @@ public class PsychParser extends RubyObject { IRubyObject implicit = runtime.newBoolean(sse.isImplicit()); IRubyObject style = runtime.newFixnum(translateFlowStyle(sse.getFlowStyle())); - sites[Call.start_sequence.ordinal()].call(context, this, handler, anchor, tag, implicit, style); + start_sequence.call(context, this, handler, anchor, tag, implicit, style); } private static void raiseParserException(ThreadContext context, ReaderException re, IRubyObject rbPath) { @@ -500,5 +512,5 @@ public class PsychParser extends RubyObject { path, event_location, start_stream, start_document, end_document, alias, scalar, start_sequence, end_sequence, start_mapping, end_mapping, end_stream } - final CachingCallSite[] sites; + private final CachingCallSite path, event_location, start_stream, start_document, end_document, alias, scalar, start_sequence, end_sequence, start_mapping, end_mapping, end_stream; } -- cgit v1.2.1 From b8a09c1cdf192061dbf6e256d91941ecf9e0ac99 Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Wed, 18 Jan 2023 23:04:52 -0600 Subject: Misc cleanup --- ext/java/org/jruby/ext/psych/PsychParser.java | 76 ++++++++++++++++----------- 1 file changed, 45 insertions(+), 31 deletions(-) (limited to 'ext') diff --git a/ext/java/org/jruby/ext/psych/PsychParser.java b/ext/java/org/jruby/ext/psych/PsychParser.java index 85dfdec..0975f46 100644 --- a/ext/java/org/jruby/ext/psych/PsychParser.java +++ b/ext/java/org/jruby/ext/psych/PsychParser.java @@ -62,6 +62,7 @@ import org.snakeyaml.engine.v2.events.AliasEvent; import org.snakeyaml.engine.v2.events.DocumentEndEvent; import org.snakeyaml.engine.v2.events.DocumentStartEvent; import org.snakeyaml.engine.v2.events.Event; +import org.snakeyaml.engine.v2.events.ImplicitTuple; import org.snakeyaml.engine.v2.events.MappingStartEvent; import org.snakeyaml.engine.v2.events.ScalarEvent; import org.snakeyaml.engine.v2.events.SequenceStartEvent; @@ -147,17 +148,26 @@ public class PsychParser extends RubyObject { private IRubyObject stringFor(ThreadContext context, String value) { Ruby runtime = context.runtime; + boolean isUTF8 = true; + Charset charset = RubyEncoding.UTF8; + Encoding encoding = runtime.getDefaultInternalEncoding(); if (encoding == null) { encoding = UTF8Encoding.INSTANCE; + charset = RubyEncoding.UTF8; + } else { + Charset encodingCharset = encoding.getCharset(); + if (encodingCharset != null) { + isUTF8 = encodingCharset == RubyEncoding.UTF8; + charset = encodingCharset; + } } - Charset charset = RubyEncoding.UTF8; - if (encoding.getCharset() != null) { - charset = encoding.getCharset(); - } - - ByteList bytes = new ByteList(value.getBytes(charset), encoding); + ByteList bytes = new ByteList( + isUTF8 ? + RubyEncoding.encodeUTF8(value) : + RubyEncoding.encode(value, charset), + encoding); RubyString string = RubyString.newString(runtime, bytes); return string; @@ -324,18 +334,23 @@ public class PsychParser extends RubyObject { RubyArray.newEmptyArray(runtime); Map tagsMap = dse.getTags(); - RubyArray tags = RubyArray.newArray(runtime); - if (tagsMap != null && tagsMap.size() > 0) { + RubyArray tags; + int size; + if (tagsMap != null && (size = tagsMap.size()) > 0) { + tags = RubyArray.newArray(runtime, size); for (Map.Entry tag : tagsMap.entrySet()) { - IRubyObject key = stringFor(context, tag.getKey()); + IRubyObject key = stringFor(context, tag.getKey()); IRubyObject value = stringFor(context, tag.getValue()); tags.append(RubyArray.newArray(runtime, key, value)); } + } else { + tags = RubyArray.newEmptyArray(runtime); } + IRubyObject notExplicit = runtime.newBoolean(!dse.isExplicit()); - invoke(context, handler, "start_document", version, tags, notExplicit); + start_document.call(context, this, handler, version, tags, notExplicit); } private void handleMappingStart(ThreadContext context, MappingStartEvent mse, IRubyObject handler) { @@ -353,8 +368,9 @@ public class PsychParser extends RubyObject { IRubyObject anchor = stringOrNilForAnchor(context, se.getAnchor()); IRubyObject tag = stringOrNilFor(context, se.getTag()); - IRubyObject plain_implicit = runtime.newBoolean(se.getImplicit().canOmitTagInPlainScalar()); - IRubyObject quoted_implicit = runtime.newBoolean(se.getImplicit().canOmitTagInNonPlainScalar()); + ImplicitTuple implicit = se.getImplicit(); + IRubyObject plain_implicit = runtime.newBoolean(implicit.canOmitTagInPlainScalar()); + IRubyObject quoted_implicit = runtime.newBoolean(implicit.canOmitTagInNonPlainScalar()); IRubyObject style = runtime.newFixnum(translateStyle(se.getScalarStyle())); IRubyObject val = stringFor(context, se.getValue()); @@ -373,21 +389,20 @@ public class PsychParser extends RubyObject { } private static void raiseParserException(ThreadContext context, ReaderException re, IRubyObject rbPath) { - Ruby runtime; + Ruby runtime = context.runtime; RubyClass se; IRubyObject exception; - runtime = context.runtime; - se = (RubyClass)runtime.getModule("Psych").getConstant("SyntaxError"); + se = (RubyClass) runtime.getModule("Psych").getConstant("SyntaxError"); exception = se.newInstance(context, new IRubyObject[] { rbPath, - runtime.newFixnum(0), - runtime.newFixnum(0), + RubyFixnum.zero(runtime), + RubyFixnum.zero(runtime), runtime.newFixnum(re.getPosition()), - (null == re.getName() ? runtime.getNil() : runtime.newString(re.getName())), - (null == re.toString() ? runtime.getNil() : runtime.newString(re.toString())) + (null == re.getName() ? context.nil : runtime.newString(re.getName())), + (null == re.toString() ? context.nil : runtime.newString(re.toString())) }, Block.NULL_BLOCK); @@ -395,12 +410,11 @@ public class PsychParser extends RubyObject { } private static void raiseParserException(ThreadContext context, MarkedYamlEngineException mye, IRubyObject rbPath) { - Ruby runtime; + Ruby runtime = context.runtime; Mark mark; RubyClass se; IRubyObject exception; - runtime = context.runtime; se = (RubyClass)runtime.getModule("Psych").getConstant("SyntaxError"); mark = mye.getProblemMark().get(); @@ -411,8 +425,8 @@ public class PsychParser extends RubyObject { runtime.newFixnum(mark.getLine() + 1), runtime.newFixnum(mark.getColumn() + 1), runtime.newFixnum(mark.getIndex()), - (null == mye.getProblem() ? runtime.getNil() : runtime.newString(mye.getProblem())), - (null == mye.getContext() ? runtime.getNil() : runtime.newString(mye.getContext())) + (null == mye.getProblem() ? context.nil : runtime.newString(mye.getProblem())), + (null == mye.getContext() ? context.nil : runtime.newString(mye.getContext())) }, Block.NULL_BLOCK); @@ -420,11 +434,10 @@ public class PsychParser extends RubyObject { } private static void raiseParserException(ThreadContext context, MalformedInputException mie, IRubyObject rbPath) { - Ruby runtime;; + Ruby runtime = context.runtime; RubyClass se; IRubyObject exception; - runtime = context.runtime; se = (RubyClass)runtime.getModule("Psych").getConstant("SyntaxError"); mie.getInputLength(); @@ -432,11 +445,11 @@ public class PsychParser extends RubyObject { exception = se.newInstance(context, arrayOf( rbPath, - runtime.newFixnum(-1), - runtime.newFixnum(-1), + RubyFixnum.minus_one(runtime), + RubyFixnum.minus_one(runtime), runtime.newFixnum(mie.getInputLength()), - runtime.getNil(), - runtime.getNil() + context.nil, + context.nil ), Block.NULL_BLOCK); @@ -471,6 +484,7 @@ public class PsychParser extends RubyObject { Event event = null; + Parser parser = this.parser; if (parser != null) { if (parser.hasNext()) { event = parser.peekEvent(); @@ -480,7 +494,7 @@ public class PsychParser extends RubyObject { } if (event == null) { - return ((RubyClass)context.runtime.getClassFromPath("Psych::Parser::Mark")).newInstance( + return ((RubyClass) runtime.getClassFromPath("Psych::Parser::Mark")).newInstance( context, RubyFixnum.zero(runtime), RubyFixnum.zero(runtime), @@ -491,7 +505,7 @@ public class PsychParser extends RubyObject { Mark mark = event.getStartMark().orElseThrow(RuntimeException::new); - return ((RubyClass)context.runtime.getClassFromPath("Psych::Parser::Mark")).newInstance( + return ((RubyClass) runtime.getClassFromPath("Psych::Parser::Mark")).newInstance( context, RubyFixnum.zero(runtime), runtime.newFixnum(mark.getLine()), -- cgit v1.2.1 From c662354eecbae162b9287b8b566b432421414f96 Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Wed, 18 Jan 2023 23:14:37 -0600 Subject: Shorter path to encode strings in emitter --- ext/java/org/jruby/ext/psych/PsychEmitter.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'ext') diff --git a/ext/java/org/jruby/ext/psych/PsychEmitter.java b/ext/java/org/jruby/ext/psych/PsychEmitter.java index 9180dcc..baaca4a 100644 --- a/ext/java/org/jruby/ext/psych/PsychEmitter.java +++ b/ext/java/org/jruby/ext/psych/PsychEmitter.java @@ -33,12 +33,14 @@ import org.jruby.Ruby; import org.jruby.RubyArray; import org.jruby.RubyBoolean; import org.jruby.RubyClass; +import org.jruby.RubyEncoding; import org.jruby.RubyModule; import org.jruby.RubyObject; import org.jruby.RubyString; import org.jruby.anno.JRubyMethod; import org.jruby.runtime.ThreadContext; import org.jruby.runtime.builtin.IRubyObject; +import org.jruby.util.ByteList; import org.jruby.util.IOOutputStream; import org.jruby.util.TypeConverter; import org.jruby.util.io.EncodingUtils; @@ -375,8 +377,9 @@ public class PsychEmitter extends RubyObject { TypeConverter.checkType(context, tag, stringClass); tagStr = (RubyString) tag; + ByteList bytes = tagStr.getByteList(); - return EncodingUtils.strConvEnc(context, tagStr, tagStr.getEncoding(), UTF8Encoding.INSTANCE).asJavaString(); + return RubyEncoding.decodeUTF8(bytes.unsafeBytes(), bytes.begin(), bytes.realSize()); } Emitter emitter; -- cgit v1.2.1 From 4bf6dac18021e1928984fe1533a88d5a65860405 Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Wed, 18 Jan 2023 23:23:07 -0600 Subject: Minor tweaks and cleanup in emitter --- ext/java/org/jruby/ext/psych/PsychEmitter.java | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) (limited to 'ext') diff --git a/ext/java/org/jruby/ext/psych/PsychEmitter.java b/ext/java/org/jruby/ext/psych/PsychEmitter.java index baaca4a..4df7989 100644 --- a/ext/java/org/jruby/ext/psych/PsychEmitter.java +++ b/ext/java/org/jruby/ext/psych/PsychEmitter.java @@ -118,17 +118,14 @@ public class PsychEmitter extends RubyObject { initEmitter(context, encoding); - StreamStartEvent event = new StreamStartEvent(NULL_MARK, NULL_MARK); - - emit(context, event); + emit(context, NULL_STREAM_START_EVENT); return this; } @JRubyMethod public IRubyObject end_stream(ThreadContext context) { - StreamEndEvent event = new StreamEndEvent(NULL_MARK, NULL_MARK); - emit(context, event); + emit(context, NULL_STREAM_START_EVENT); return this; } @@ -368,16 +365,16 @@ public class PsychEmitter extends RubyObject { return dumpSettingsBuilder.build(); } - private String exportToUTF8(ThreadContext context, IRubyObject tag, RubyClass stringClass) { - if (tag.isNil()) { + private String exportToUTF8(ThreadContext context, IRubyObject maybeString, RubyClass stringClass) { + if (maybeString.isNil()) { return null; } - RubyString tagStr; + RubyString string; - TypeConverter.checkType(context, tag, stringClass); - tagStr = (RubyString) tag; - ByteList bytes = tagStr.getByteList(); + TypeConverter.checkType(context, maybeString, stringClass); + string = (RubyString) maybeString; + ByteList bytes = string.getByteList(); return RubyEncoding.decodeUTF8(bytes.unsafeBytes(), bytes.begin(), bytes.realSize()); } @@ -388,6 +385,7 @@ public class PsychEmitter extends RubyObject { IRubyObject io; private static final Optional NULL_MARK = Optional.empty(); + private static final StreamStartEvent NULL_STREAM_START_EVENT = new StreamStartEvent(NULL_MARK, NULL_MARK); // Map style constants from Psych values (ANY = 0 ... FOLDED = 5) // to SnakeYaml values; see psych/nodes/scalar.rb. -- cgit v1.2.1