summaryrefslogtreecommitdiff
path: root/tests
diff options
context:
space:
mode:
authorFederico Mena Quintero <federico@gnome.org>2022-03-08 16:45:57 -0600
committerFederico Mena Quintero <federico@gnome.org>2022-03-08 19:37:44 -0600
commit8dc81944708e92aff573f9badb708d7de421b721 (patch)
tree42daa5d3714e23f3a138c512d540a561c504214f /tests
parent874c16ec8f46822a0a7d7cfdbdceb65bdb37b452 (diff)
downloadlibrsvg-8dc81944708e92aff573f9badb708d7de421b721.tar.gz
PdfPredicate.with_text() - simple way to test for a PDF containing some textual content
This is *really* basic; lopdf's Document::extract_text() simply concatenates all the text strings in a page and returns them, so the predicate just does String::contains(). If we ever need something more sophisticated, we can walk the PDF structure manually with lopdf (gulp) and extract individual commands for text. Part-of: <https://gitlab.gnome.org/GNOME/librsvg/-/merge_requests/673>
Diffstat (limited to 'tests')
-rw-r--r--tests/src/predicates/pdf.rs29
1 files changed, 29 insertions, 0 deletions
diff --git a/tests/src/predicates/pdf.rs b/tests/src/predicates/pdf.rs
index 862e64bb..62a3da4b 100644
--- a/tests/src/predicates/pdf.rs
+++ b/tests/src/predicates/pdf.rs
@@ -50,6 +50,13 @@ impl PdfPredicate {
d: Detail::Link(link.to_string()),
}
}
+
+ pub fn with_text(self: Self, text: &str) -> DetailPredicate<Self> {
+ DetailPredicate::<Self> {
+ p: self,
+ d: Detail::Text(text.to_string()),
+ }
+ }
}
impl Predicate<[u8]> for PdfPredicate {
@@ -86,6 +93,7 @@ enum Detail {
PageSize(Dimensions, usize),
CreationDate(DateTime<Utc>),
Link(String),
+ Text(String),
}
/// A PDF page's dimensions from its `MediaBox`.
@@ -160,6 +168,7 @@ impl DetailPredicate<PdfPredicate> {
Detail::PageSize(d, idx) => doc.get_page_size(*idx).map_or(false, |dim| dim == *d),
Detail::CreationDate(d) => doc.get_creation_date().map_or(false, |date| date == *d),
Detail::Link(link) => document_has_link(doc, &link),
+ Detail::Text(text) => document_has_text(doc, &text),
}
}
@@ -193,6 +202,9 @@ impl DetailPredicate<PdfPredicate> {
"actual link contents",
"FIXME: who knows, but it's not what we expected".to_string(),
),
+ Detail::Text(_) => {
+ Product::new("actual text contents", doc.extract_text(&[1]).unwrap())
+ }
}
}
}
@@ -290,10 +302,21 @@ impl fmt::Display for DetailPredicate<PdfPredicate> {
Detail::PageSize(d, _) => write!(f, "is a PDF sized {}", d),
Detail::CreationDate(d) => write!(f, "is a PDF created {:?}", d),
Detail::Link(l) => write!(f, "is a PDF with a link to {}", l),
+ Detail::Text(t) => write!(f, "is a PDF with \"{}\" in its text content", t),
}
}
}
+// This is an extremely trivial test for a string being present in the document's
+// text objects.
+fn document_has_text(document: &lopdf::Document, needle: &str) -> bool {
+ if let Ok(haystack) = text_from_first_page(document) {
+ haystack.contains(needle)
+ } else {
+ false
+ }
+}
+
// We do a super simple test that a PDF actually contains an Annotation object
// with a particular link. We don't test that this annotation is actually linked
// from a page; that would be nicer.
@@ -327,3 +350,9 @@ fn dict_has_a_with_link(dict: &Dictionary, link_text: &str) -> bool {
.map(|string| string == link_text.as_bytes())
.unwrap_or(false)
}
+
+fn text_from_first_page(doc: &lopdf::Document) -> lopdf::Result<String> {
+ // This is extremely simplistic; lopdf just concatenates all the text in the page
+ // into a single string.
+ doc.extract_text(&[1])
+}