summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSam Thursfield <sam@afuera.me.uk>2016-04-07 17:30:56 +0100
committerSam Thursfield <sam@afuera.me.uk>2018-07-15 18:07:57 +0200
commitb458fda8fa03f5ec53f8de9edfeef1b7610aae9c (patch)
treed02ee6ca29d0b382aa2377968997be1e007cd8ab
parentc656e8adab19ec3b3d6ed29a182d265e8ace7208 (diff)
downloadtracker-sam/resource-jsonld.tar.gz
Add support to TrackerResource for outputting metadata as JSON-LDsam/resource-jsonld
This also adds `tracker extract -o json` to dump resources in JSON-LD, in anticipation of corresponding updates in tracker-miners.git.
-rw-r--r--configure.ac5
-rw-r--r--docs/manpages/tracker-extract.12
-rw-r--r--src/libtracker-common/tracker-enums.h6
-rw-r--r--src/libtracker-sparql/meson.build2
-rw-r--r--src/libtracker-sparql/tracker-namespace-manager.c20
-rw-r--r--src/libtracker-sparql/tracker-namespace-manager.h2
-rw-r--r--src/libtracker-sparql/tracker-resource.c175
-rw-r--r--src/libtracker-sparql/tracker-resource.h2
-rw-r--r--src/tracker/tracker-extract.c2
9 files changed, 211 insertions, 5 deletions
diff --git a/configure.ac b/configure.ac
index e9e291811..97fe99f19 100644
--- a/configure.ac
+++ b/configure.ac
@@ -296,8 +296,9 @@ LIBTRACKER_CONTROL_REQUIRED="glib-2.0 >= $GLIB_REQUIRED
PKG_CHECK_MODULES(LIBTRACKER_CONTROL, [$LIBTRACKER_CONTROL_REQUIRED])
# Check requirements for libtracker-sparql
-LIBTRACKER_SPARQL_REQUIRED="glib-2.0 >= $GLIB_REQUIRED
- gio-unix-2.0 >= $GLIB_REQUIRED
+LIBTRACKER_SPARQL_REQUIRED="glib-2.0 >= $GLIB_REQUIRED
+ gio-unix-2.0 >= $GLIB_REQUIRED
+ json-glib-1.0 >= $JSON_GLIB_REQUIRED
uuid"
PKG_CHECK_MODULES(LIBTRACKER_SPARQL, [$LIBTRACKER_SPARQL_REQUIRED])
diff --git a/docs/manpages/tracker-extract.1 b/docs/manpages/tracker-extract.1
index 9b3503c7a..e482fe171 100644
--- a/docs/manpages/tracker-extract.1
+++ b/docs/manpages/tracker-extract.1
@@ -71,7 +71,7 @@ The possible \fILEVEL\fR options are:
.TP
.B \-o, \-\-output-format\fR=<\fIFORMAT\fR>
Choose which format to use to output results. Supported formats are
-\fIsparql\fR and \fIturtle\fR.
+\fIsparql\fR, \fIturtle\fR and \fIjson-ld\fR.
.SH EXAMPLES
.TP
diff --git a/src/libtracker-common/tracker-enums.h b/src/libtracker-common/tracker-enums.h
index 2be97c174..f3e2bbd53 100644
--- a/src/libtracker-common/tracker-enums.h
+++ b/src/libtracker-common/tracker-enums.h
@@ -38,6 +38,12 @@ typedef enum {
typedef enum {
TRACKER_SERIALIZATION_FORMAT_SPARQL,
TRACKER_SERIALIZATION_FORMAT_TURTLE,
+ /* JSON and JSON_LD are treated as the same thing right now, but we could
+ * treat them differently if we wanted. also it's nice to be able to pass
+ * both 'json' and 'json-ld' to `tracker extract --output-format=`.
+ */
+ TRACKER_SERIALIZATION_FORMAT_JSON,
+ TRACKER_SERIALIZATION_FORMAT_JSON_LD,
} TrackerSerializationFormat;
G_END_DECLS
diff --git a/src/libtracker-sparql/meson.build b/src/libtracker-sparql/meson.build
index fa7d45f72..90e10ec04 100644
--- a/src/libtracker-sparql/meson.build
+++ b/src/libtracker-sparql/meson.build
@@ -74,7 +74,7 @@ libtracker_sparql_c_public_headers = files(
libtracker_sparql_intermediate_c = static_library('tracker-sparql-intermediate-c',
enums_c, enums_h,
libtracker_sparql_c_sources,
- dependencies: [tracker_common_dep],
+ dependencies: [tracker_common_dep, json_glib],
)
tracker_sparql_intermediate_dep = declare_dependency(
diff --git a/src/libtracker-sparql/tracker-namespace-manager.c b/src/libtracker-sparql/tracker-namespace-manager.c
index 15123bd08..5ce4cfa4d 100644
--- a/src/libtracker-sparql/tracker-namespace-manager.c
+++ b/src/libtracker-sparql/tracker-namespace-manager.c
@@ -331,3 +331,23 @@ tracker_namespace_manager_print_turtle (TrackerNamespaceManager *self)
return g_string_free (result, FALSE);
}
+
+/**
+ * tracker_namespace_manager_foreach:
+ * @self: a #TrackerNamespaceManager
+ * @func: the function to call for each prefix / URI pair
+ * @user_data: user data to pass to the function
+ *
+ * Calls @func for each known prefix / URI pair.
+ *
+ * Since: 1.10
+ */
+void
+tracker_namespace_manager_foreach (TrackerNamespaceManager *self,
+ GHFunc func,
+ gpointer user_data)
+{
+ TrackerNamespaceManagerPrivate *priv = GET_PRIVATE (self);
+
+ g_hash_table_foreach (priv->prefix_to_namespace, func, user_data);
+};
diff --git a/src/libtracker-sparql/tracker-namespace-manager.h b/src/libtracker-sparql/tracker-namespace-manager.h
index 2a6e45e9d..0f539d41a 100644
--- a/src/libtracker-sparql/tracker-namespace-manager.h
+++ b/src/libtracker-sparql/tracker-namespace-manager.h
@@ -43,6 +43,8 @@ void tracker_namespace_manager_add_prefix (TrackerNamespaceManager *self, const
char *tracker_namespace_manager_print_turtle (TrackerNamespaceManager *self);
+void tracker_namespace_manager_foreach (TrackerNamespaceManager *self, GHFunc func, gpointer user_data);
+
G_END_DECLS
#endif /* __LIBTRACKER_SPARQL_NAMESPACE_MANAGER_H__ */
diff --git a/src/libtracker-sparql/tracker-resource.c b/src/libtracker-sparql/tracker-resource.c
index 694f5ac87..2270900fc 100644
--- a/src/libtracker-sparql/tracker-resource.c
+++ b/src/libtracker-sparql/tracker-resource.c
@@ -18,6 +18,7 @@
*/
#include <glib.h>
+#include <json-glib/json-glib.h>
#include <string.h>
@@ -1542,3 +1543,177 @@ tracker_resource_print_sparql_update (TrackerResource *resource,
return g_string_free (context.string, FALSE);
}
+
+typedef struct {
+ TrackerNamespaceManager *all_namespaces, *our_namespaces;
+ JsonBuilder *builder;
+ GList *done_list;
+} GenerateJsonldData;
+
+static void generate_jsonld_foreach (gpointer key, gpointer value_ptr, gpointer user_data);
+
+static void
+tracker_resource_generate_jsonld (TrackerResource *self,
+ GenerateJsonldData *data)
+{
+ TrackerResourcePrivate *priv = GET_PRIVATE (self);
+ JsonBuilder *builder = data->builder;
+
+ /* The JSON-LD spec says it is "important that nodes have an identifier", but
+ * doesn't mandate one. I think it's better to omit the ID for blank nodes
+ * (where the caller passed NULL as an identifier) than to emit something
+ * SPARQL-specific like '_:123'.
+ */
+ if (strncmp (priv->identifier, "_:", 2) != 0) {
+ json_builder_set_member_name (builder, "@id");
+ json_builder_add_string_value (builder, priv->identifier);
+ }
+
+ g_hash_table_foreach (priv->properties, generate_jsonld_foreach, data);
+};
+
+static void
+generate_jsonld_value (const GValue *value,
+ GenerateJsonldData *data)
+{
+ JsonNode *node;
+
+ if (G_VALUE_HOLDS (value, TRACKER_TYPE_RESOURCE)) {
+ TrackerResource *resource;
+
+ resource = TRACKER_RESOURCE (g_value_get_object (value));
+
+ if (g_list_find_custom (data->done_list, resource, (GCompareFunc) tracker_resource_compare) == NULL) {
+ json_builder_begin_object (data->builder);
+
+ tracker_resource_generate_jsonld (resource, data);
+
+ json_builder_end_object (data->builder);
+
+ data->done_list = g_list_prepend (data->done_list, resource);
+ } else {
+ json_builder_add_string_value (data->builder, tracker_resource_get_identifier(resource));
+ }
+ } else if (G_VALUE_HOLDS (value, TRACKER_TYPE_URI)) {
+ /* URIs can be treated the same as strings in JSON-LD provided the @context
+ * sets the type of that property correctly. However, json_node_set_value()
+ * will reject a GValue holding TRACKER_TYPE_URI, so we have to extract the
+ * string manually here.
+ */
+ const char *uri = g_value_get_string (value);
+ maybe_intern_prefix_of_compact_uri (data->all_namespaces, data->our_namespaces, uri);
+ node = json_node_new (JSON_NODE_VALUE);
+ json_node_set_string (node, uri);
+ json_builder_add_value (data->builder, node);
+ } else {
+ node = json_node_new (JSON_NODE_VALUE);
+ json_node_set_value (node, value);
+ json_builder_add_value (data->builder, node);
+ }
+}
+
+static void
+generate_jsonld_foreach (gpointer key,
+ gpointer value_ptr,
+ gpointer user_data)
+{
+ const char *property = key;
+ const GValue *value = value_ptr;
+ GenerateJsonldData *data = user_data;
+ JsonBuilder *builder = data->builder;
+
+ if (strcmp (property, "rdf:type") == 0) {
+ property = "@type";
+ } else {
+ maybe_intern_prefix_of_compact_uri (data->all_namespaces, data->our_namespaces, property);
+ }
+
+ json_builder_set_member_name (builder, property);
+
+ if (G_VALUE_HOLDS (value, G_TYPE_PTR_ARRAY)) {
+ json_builder_begin_array (builder);
+ g_ptr_array_foreach (g_value_get_boxed (value), (GFunc) generate_jsonld_value, data);
+ json_builder_end_array (builder);
+ } else {
+ generate_jsonld_value (value, data);
+ }
+}
+
+static void
+generate_jsonld_namespace_mapping_foreach (gpointer key,
+ gpointer value,
+ gpointer user_data)
+{
+ GenerateJsonldData *data = user_data;
+
+ json_builder_set_member_name (data->builder, key);
+ json_builder_add_string_value (data->builder, value);
+}
+
+
+/**
+ * tracker_resource_print_jsonld:
+ * @self: a #TrackerResource
+ * @namespaces: (allow-none): a set of prefixed URLs, or %NULL to use the
+ * default set
+ *
+ * Serialize all the information in @resource as a JSON-LD document.
+ *
+ * See <http://www.jsonld.org/> for more information on the JSON-LD
+ * serialization format.
+ *
+ * The @namespaces object is used to expand any compact URI values. In most
+ * cases you should pass %NULL, which means the set of namespaces returned by
+ * tracker_namespace_manager_get_default() will be used. This defines the
+ * usual prefixes for all of the ontologies that Tracker ships with by default.
+ *
+ * Returns: a newly-allocated string containing JSON-LD data.
+ *
+ * Since: 2.0.5
+ */
+char *
+tracker_resource_print_jsonld (TrackerResource *self,
+ TrackerNamespaceManager *namespaces)
+{
+ GenerateJsonldData context;
+ JsonNode *json_root_node;
+ JsonGenerator *generator;
+ char *result;
+
+ if (namespaces == NULL) {
+ namespaces = tracker_namespace_manager_get_default ();
+ }
+
+ context.all_namespaces = namespaces;
+ context.our_namespaces = tracker_namespace_manager_new ();
+ context.builder = json_builder_new ();
+ context.done_list = NULL;
+
+ maybe_intern_prefix_of_compact_uri (context.all_namespaces, context.our_namespaces, tracker_resource_get_identifier (self));
+
+ json_builder_begin_object (context.builder);
+
+ tracker_resource_generate_jsonld (self, &context);
+
+ json_builder_set_member_name (context.builder, "@context");
+ json_builder_begin_object (context.builder);
+ tracker_namespace_manager_foreach (context.our_namespaces, generate_jsonld_namespace_mapping_foreach, &context);
+ json_builder_end_object (context.builder);
+
+ json_builder_end_object (context.builder);
+
+ json_root_node = json_builder_get_root (context.builder);
+
+ generator = json_generator_new ();
+ json_generator_set_root (generator, json_root_node);
+ json_generator_set_pretty (generator, TRUE);
+
+ result = json_generator_to_data (generator, NULL);
+
+ g_list_free (context.done_list);
+ json_node_free (json_root_node);
+ g_object_unref (context.builder);
+ g_object_unref (generator);
+
+ return result;
+}
diff --git a/src/libtracker-sparql/tracker-resource.h b/src/libtracker-sparql/tracker-resource.h
index 2b4e2d72f..89e765a4a 100644
--- a/src/libtracker-sparql/tracker-resource.h
+++ b/src/libtracker-sparql/tracker-resource.h
@@ -74,6 +74,8 @@ char *tracker_resource_print_turtle(TrackerResource *self, TrackerNamespaceManag
char *tracker_resource_print_sparql_update (TrackerResource *self, TrackerNamespaceManager *namespaces, const char *graph_id);
+char *tracker_resource_print_jsonld (TrackerResource *self, TrackerNamespaceManager *namespaces);
+
G_END_DECLS
#endif /* __LIBTRACKER_RESOURCE_H__ */
diff --git a/src/tracker/tracker-extract.c b/src/tracker/tracker-extract.c
index 9b5e737d8..040fc059b 100644
--- a/src/tracker/tracker-extract.c
+++ b/src/tracker/tracker-extract.c
@@ -42,7 +42,7 @@ static GOptionEntry entries[] = {
N_("Sets the logging verbosity to LEVEL (“debug”, “detailed”, “minimal”, “errors”) for all processes"),
N_("LEVEL") },
{ "output-format", 'o', 0, G_OPTION_ARG_STRING, &output_format,
- N_("Output results format: “sparql”, or “turtle”"),
+ N_("Output results format: 'sparql', 'turtle' or 'json-ld'"),
N_("FORMAT") },
{ G_OPTION_REMAINING, 0, 0, G_OPTION_ARG_FILENAME_ARRAY, &filenames,
N_("FILE"),