diff options
author | happy <17792773+jml-happy@users.noreply.github.com> | 2021-05-20 15:06:10 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-05-20 16:06:10 -0400 |
commit | 0fe082680b998a3a85e2515ba375e685d8022688 (patch) | |
tree | f3e9a063312fe44a496c9ec1de74b39aeed4f83e | |
parent | b71df0db8729e2f24965bb15ced0d580390de99e (diff) | |
download | networkx-0fe082680b998a3a85e2515ba375e685d8022688.tar.gz |
Fixes read/write_gml with nan/inf attributes (#4497)
* handle nan/inf for gml files
* handle nan/inf for gml files
* ran black
* Better fix for nan/inf.
* add tests for special floats in gml.py
Co-authored-by: Dan Schult <dschult@colgate.edu>
-rw-r--r-- | networkx/readwrite/gml.py | 28 | ||||
-rw-r--r-- | networkx/readwrite/tests/test_gml.py | 103 |
2 files changed, 124 insertions, 7 deletions
diff --git a/networkx/readwrite/gml.py b/networkx/readwrite/gml.py index bc3ef710..d84f9260 100644 --- a/networkx/readwrite/gml.py +++ b/networkx/readwrite/gml.py @@ -288,7 +288,7 @@ def parse_gml_lines(lines, label, destringizer): patterns = [ r"[A-Za-z][0-9A-Za-z_]*\b", # keys # reals - r"[+-]?(?:[0-9]*\.[0-9]+|[0-9]+\.[0-9]*)(?:[Ee][+-]?[0-9]+)?", + r"[+-]?(?:[0-9]*\.[0-9]+|[0-9]+\.[0-9]*|INF)(?:[Ee][+-]?[0-9]+)?", r"[+-]?[0-9]+", # ints r'".*?"', # strings r"\[", # dict start @@ -370,6 +370,15 @@ def parse_gml_lines(lines, label, destringizer): + " convertable ASCII value for node id or label" ) unexpected(curr_token, msg) + # Special handling for nan and infinity. Since the gml language + # defines unquoted strings as keys, the numeric and string branches + # are skipped and we end up in this special branch, so we need to + # convert the current token value to a float for NAN and plain INF. + # +/-INF are handled in the pattern for 'reals' in tokenize(). This + # allows labels and values to be nan or infinity, but not keys. + elif curr_token.value in {"NAN", "INF"}: + value = float(curr_token.value) + curr_token = next(tokens) else: # Otherwise error out unexpected(curr_token, "an int, float, string or '['") dct[key].append(value) @@ -680,12 +689,17 @@ def generate_gml(G, stringizer=None): yield indent + key + " " + str(value) elif isinstance(value, float): text = repr(value).upper() - # GML requires that a real literal contain a decimal point, but - # repr may not output a decimal point when the mantissa is - # integral and hence needs fixing. - epos = text.rfind("E") - if epos != -1 and text.find(".", 0, epos) == -1: - text = text[:epos] + "." + text[epos:] + # GML matches INF to keys, so prepend + to INF. Use repr(float(*)) + # instead of string literal to future proof against changes to repr. + if text == repr(float("inf")).upper(): + text = "+" + text + else: + # GML requires that a real literal contain a decimal point, but + # repr may not output a decimal point when the mantissa is + # integral and hence needs fixing. + epos = text.rfind("E") + if epos != -1 and text.find(".", 0, epos) == -1: + text = text[:epos] + "." + text[epos:] if key == "label": yield indent + key + ' "' + text + '"' else: diff --git a/networkx/readwrite/tests/test_gml.py b/networkx/readwrite/tests/test_gml.py index 7511c233..51c08c4f 100644 --- a/networkx/readwrite/tests/test_gml.py +++ b/networkx/readwrite/tests/test_gml.py @@ -2,6 +2,7 @@ from ast import literal_eval import codecs from contextlib import contextmanager import io +import math import pytest import networkx as nx from networkx.readwrite.gml import literal_stringizer, literal_destringizer @@ -287,6 +288,108 @@ graph ]""" assert data == answer + def test_float_label(self): + special_floats = [float("nan"), float("+inf"), float("-inf")] + try: + import numpy as np + + special_floats += [np.nan, np.inf, np.inf * -1] + except ImportError: + special_floats += special_floats + + G = nx.cycle_graph(len(special_floats)) + attrs = dict(enumerate(special_floats)) + nx.set_node_attributes(G, attrs, "nodefloat") + edges = list(G.edges) + attrs = {edges[i]: value for i, value in enumerate(special_floats)} + nx.set_edge_attributes(G, attrs, "edgefloat") + + fobj = tempfile.NamedTemporaryFile() + nx.write_gml(G, fobj) + fobj.seek(0) + # Should be bytes in 2.x and 3.x + data = fobj.read().strip().decode("ascii") + answer = """graph [ + node [ + id 0 + label "0" + nodefloat NAN + ] + node [ + id 1 + label "1" + nodefloat +INF + ] + node [ + id 2 + label "2" + nodefloat -INF + ] + node [ + id 3 + label "3" + nodefloat NAN + ] + node [ + id 4 + label "4" + nodefloat +INF + ] + node [ + id 5 + label "5" + nodefloat -INF + ] + edge [ + source 0 + target 1 + edgefloat NAN + ] + edge [ + source 0 + target 5 + edgefloat +INF + ] + edge [ + source 1 + target 2 + edgefloat -INF + ] + edge [ + source 2 + target 3 + edgefloat NAN + ] + edge [ + source 3 + target 4 + edgefloat +INF + ] + edge [ + source 4 + target 5 + edgefloat -INF + ] +]""" + assert data == answer + + fobj.seek(0) + graph = nx.read_gml(fobj) + for indx, value in enumerate(special_floats): + node_value = graph.nodes[str(indx)]["nodefloat"] + if math.isnan(value): + assert math.isnan(node_value) + else: + assert node_value == value + + edge = edges[indx] + string_edge = (str(edge[0]), str(edge[1])) + edge_value = graph.edges[string_edge]["edgefloat"] + if math.isnan(value): + assert math.isnan(edge_value) + else: + assert edge_value == value + def test_name(self): G = nx.parse_gml('graph [ name "x" node [ id 0 label "x" ] ]') assert "x" == G.graph["name"] |