summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorhappy <17792773+jml-happy@users.noreply.github.com>2021-05-20 15:06:10 -0500
committerGitHub <noreply@github.com>2021-05-20 16:06:10 -0400
commit0fe082680b998a3a85e2515ba375e685d8022688 (patch)
treef3e9a063312fe44a496c9ec1de74b39aeed4f83e
parentb71df0db8729e2f24965bb15ced0d580390de99e (diff)
downloadnetworkx-0fe082680b998a3a85e2515ba375e685d8022688.tar.gz
Fixes read/write_gml with nan/inf attributes (#4497)
* handle nan/inf for gml files * handle nan/inf for gml files * ran black * Better fix for nan/inf. * add tests for special floats in gml.py Co-authored-by: Dan Schult <dschult@colgate.edu>
-rw-r--r--networkx/readwrite/gml.py28
-rw-r--r--networkx/readwrite/tests/test_gml.py103
2 files changed, 124 insertions, 7 deletions
diff --git a/networkx/readwrite/gml.py b/networkx/readwrite/gml.py
index bc3ef710..d84f9260 100644
--- a/networkx/readwrite/gml.py
+++ b/networkx/readwrite/gml.py
@@ -288,7 +288,7 @@ def parse_gml_lines(lines, label, destringizer):
patterns = [
r"[A-Za-z][0-9A-Za-z_]*\b", # keys
# reals
- r"[+-]?(?:[0-9]*\.[0-9]+|[0-9]+\.[0-9]*)(?:[Ee][+-]?[0-9]+)?",
+ r"[+-]?(?:[0-9]*\.[0-9]+|[0-9]+\.[0-9]*|INF)(?:[Ee][+-]?[0-9]+)?",
r"[+-]?[0-9]+", # ints
r'".*?"', # strings
r"\[", # dict start
@@ -370,6 +370,15 @@ def parse_gml_lines(lines, label, destringizer):
+ " convertable ASCII value for node id or label"
)
unexpected(curr_token, msg)
+ # Special handling for nan and infinity. Since the gml language
+ # defines unquoted strings as keys, the numeric and string branches
+ # are skipped and we end up in this special branch, so we need to
+ # convert the current token value to a float for NAN and plain INF.
+ # +/-INF are handled in the pattern for 'reals' in tokenize(). This
+ # allows labels and values to be nan or infinity, but not keys.
+ elif curr_token.value in {"NAN", "INF"}:
+ value = float(curr_token.value)
+ curr_token = next(tokens)
else: # Otherwise error out
unexpected(curr_token, "an int, float, string or '['")
dct[key].append(value)
@@ -680,12 +689,17 @@ def generate_gml(G, stringizer=None):
yield indent + key + " " + str(value)
elif isinstance(value, float):
text = repr(value).upper()
- # GML requires that a real literal contain a decimal point, but
- # repr may not output a decimal point when the mantissa is
- # integral and hence needs fixing.
- epos = text.rfind("E")
- if epos != -1 and text.find(".", 0, epos) == -1:
- text = text[:epos] + "." + text[epos:]
+ # GML matches INF to keys, so prepend + to INF. Use repr(float(*))
+ # instead of string literal to future proof against changes to repr.
+ if text == repr(float("inf")).upper():
+ text = "+" + text
+ else:
+ # GML requires that a real literal contain a decimal point, but
+ # repr may not output a decimal point when the mantissa is
+ # integral and hence needs fixing.
+ epos = text.rfind("E")
+ if epos != -1 and text.find(".", 0, epos) == -1:
+ text = text[:epos] + "." + text[epos:]
if key == "label":
yield indent + key + ' "' + text + '"'
else:
diff --git a/networkx/readwrite/tests/test_gml.py b/networkx/readwrite/tests/test_gml.py
index 7511c233..51c08c4f 100644
--- a/networkx/readwrite/tests/test_gml.py
+++ b/networkx/readwrite/tests/test_gml.py
@@ -2,6 +2,7 @@ from ast import literal_eval
import codecs
from contextlib import contextmanager
import io
+import math
import pytest
import networkx as nx
from networkx.readwrite.gml import literal_stringizer, literal_destringizer
@@ -287,6 +288,108 @@ graph
]"""
assert data == answer
+ def test_float_label(self):
+ special_floats = [float("nan"), float("+inf"), float("-inf")]
+ try:
+ import numpy as np
+
+ special_floats += [np.nan, np.inf, np.inf * -1]
+ except ImportError:
+ special_floats += special_floats
+
+ G = nx.cycle_graph(len(special_floats))
+ attrs = dict(enumerate(special_floats))
+ nx.set_node_attributes(G, attrs, "nodefloat")
+ edges = list(G.edges)
+ attrs = {edges[i]: value for i, value in enumerate(special_floats)}
+ nx.set_edge_attributes(G, attrs, "edgefloat")
+
+ fobj = tempfile.NamedTemporaryFile()
+ nx.write_gml(G, fobj)
+ fobj.seek(0)
+ # Should be bytes in 2.x and 3.x
+ data = fobj.read().strip().decode("ascii")
+ answer = """graph [
+ node [
+ id 0
+ label "0"
+ nodefloat NAN
+ ]
+ node [
+ id 1
+ label "1"
+ nodefloat +INF
+ ]
+ node [
+ id 2
+ label "2"
+ nodefloat -INF
+ ]
+ node [
+ id 3
+ label "3"
+ nodefloat NAN
+ ]
+ node [
+ id 4
+ label "4"
+ nodefloat +INF
+ ]
+ node [
+ id 5
+ label "5"
+ nodefloat -INF
+ ]
+ edge [
+ source 0
+ target 1
+ edgefloat NAN
+ ]
+ edge [
+ source 0
+ target 5
+ edgefloat +INF
+ ]
+ edge [
+ source 1
+ target 2
+ edgefloat -INF
+ ]
+ edge [
+ source 2
+ target 3
+ edgefloat NAN
+ ]
+ edge [
+ source 3
+ target 4
+ edgefloat +INF
+ ]
+ edge [
+ source 4
+ target 5
+ edgefloat -INF
+ ]
+]"""
+ assert data == answer
+
+ fobj.seek(0)
+ graph = nx.read_gml(fobj)
+ for indx, value in enumerate(special_floats):
+ node_value = graph.nodes[str(indx)]["nodefloat"]
+ if math.isnan(value):
+ assert math.isnan(node_value)
+ else:
+ assert node_value == value
+
+ edge = edges[indx]
+ string_edge = (str(edge[0]), str(edge[1]))
+ edge_value = graph.edges[string_edge]["edgefloat"]
+ if math.isnan(value):
+ assert math.isnan(edge_value)
+ else:
+ assert edge_value == value
+
def test_name(self):
G = nx.parse_gml('graph [ name "x" node [ id 0 label "x" ] ]')
assert "x" == G.graph["name"]