summaryrefslogtreecommitdiff
path: root/rsvg_convert/tests/internal_predicates/pdf.rs
diff options
context:
space:
mode:
Diffstat (limited to 'rsvg_convert/tests/internal_predicates/pdf.rs')
-rw-r--r--rsvg_convert/tests/internal_predicates/pdf.rs358
1 files changed, 358 insertions, 0 deletions
diff --git a/rsvg_convert/tests/internal_predicates/pdf.rs b/rsvg_convert/tests/internal_predicates/pdf.rs
new file mode 100644
index 00000000..f7872d71
--- /dev/null
+++ b/rsvg_convert/tests/internal_predicates/pdf.rs
@@ -0,0 +1,358 @@
+use chrono::{DateTime, Utc};
+use float_cmp::approx_eq;
+use lopdf::{self, Dictionary, Object};
+use predicates::prelude::*;
+use predicates::reflection::{Case, Child, PredicateReflection, Product};
+use std::cmp;
+use std::fmt;
+
+/// Checks that the variable of type [u8] can be parsed as a PDF file.
+#[derive(Debug)]
+pub struct PdfPredicate {}
+
+impl PdfPredicate {
+ pub fn with_page_count(self: Self, num_pages: usize) -> DetailPredicate<Self> {
+ DetailPredicate::<Self> {
+ p: self,
+ d: Detail::PageCount(num_pages),
+ }
+ }
+
+ pub fn with_page_size(
+ self: Self,
+ idx: usize,
+ width_in_points: f32,
+ height_in_points: f32,
+ ) -> DetailPredicate<Self> {
+ DetailPredicate::<Self> {
+ p: self,
+ d: Detail::PageSize(
+ Dimensions {
+ w: width_in_points,
+ h: height_in_points,
+ unit: 1.0,
+ },
+ idx,
+ ),
+ }
+ }
+
+ pub fn with_creation_date(self: Self, when: DateTime<Utc>) -> DetailPredicate<Self> {
+ DetailPredicate::<Self> {
+ p: self,
+ d: Detail::CreationDate(when),
+ }
+ }
+
+ pub fn with_link(self: Self, link: &str) -> DetailPredicate<Self> {
+ DetailPredicate::<Self> {
+ p: self,
+ d: Detail::Link(link.to_string()),
+ }
+ }
+
+ pub fn with_text(self: Self, text: &str) -> DetailPredicate<Self> {
+ DetailPredicate::<Self> {
+ p: self,
+ d: Detail::Text(text.to_string()),
+ }
+ }
+}
+
+impl Predicate<[u8]> for PdfPredicate {
+ fn eval(&self, data: &[u8]) -> bool {
+ lopdf::Document::load_mem(data).is_ok()
+ }
+
+ fn find_case<'a>(&'a self, _expected: bool, data: &[u8]) -> Option<Case<'a>> {
+ match lopdf::Document::load_mem(data) {
+ Ok(_) => None,
+ Err(e) => Some(Case::new(Some(self), false).add_product(Product::new("Error", e))),
+ }
+ }
+}
+
+impl PredicateReflection for PdfPredicate {}
+
+impl fmt::Display for PdfPredicate {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ write!(f, "is a PDF")
+ }
+}
+
+/// Extends a PdfPredicate by a check for page count, page size or creation date.
+#[derive(Debug)]
+pub struct DetailPredicate<PdfPredicate> {
+ p: PdfPredicate,
+ d: Detail,
+}
+
+#[derive(Debug)]
+enum Detail {
+ PageCount(usize),
+ PageSize(Dimensions, usize),
+ CreationDate(DateTime<Utc>),
+ Link(String),
+ Text(String),
+}
+
+/// A PDF page's dimensions from its `MediaBox`.
+///
+/// Note that `w` and `h` given in `UserUnit`, which is by default 1.0 = 1/72 inch.
+#[derive(Debug)]
+struct Dimensions {
+ w: f32,
+ h: f32,
+ unit: f32, // UserUnit, in points (1/72 of an inch)
+}
+
+impl Dimensions {
+ pub fn from_media_box(obj: &lopdf::Object, unit: Option<f32>) -> lopdf::Result<Dimensions> {
+ let a = obj.as_array()?;
+ Ok(Dimensions {
+ w: a[2].as_float()?,
+ h: a[3].as_float()?,
+ unit: unit.unwrap_or(1.0),
+ })
+ }
+
+ pub fn width_in_pt(self: &Self) -> f32 {
+ self.w * self.unit
+ }
+
+ pub fn height_in_pt(self: &Self) -> f32 {
+ self.h * self.unit
+ }
+}
+
+impl fmt::Display for Dimensions {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ write!(f, "{} pt x {} pt", self.width_in_pt(), self.height_in_pt())
+ }
+}
+
+impl cmp::PartialEq for Dimensions {
+ fn eq(&self, other: &Self) -> bool {
+ approx_eq!(
+ f32,
+ self.width_in_pt(),
+ other.width_in_pt(),
+ epsilon = 0.0001
+ ) && approx_eq!(
+ f32,
+ self.height_in_pt(),
+ other.height_in_pt(),
+ epsilon = 0.0001
+ )
+ }
+}
+
+impl cmp::Eq for Dimensions {}
+
+trait Details {
+ fn get_page_count(&self) -> usize;
+ fn get_page_size(&self, idx: usize) -> Option<Dimensions>;
+ fn get_creation_date(&self) -> Option<DateTime<Utc>>;
+ fn get_from_trailer<'a>(self: &'a Self, key: &[u8]) -> lopdf::Result<&'a lopdf::Object>;
+ fn get_from_page<'a>(
+ self: &'a Self,
+ idx: usize,
+ key: &[u8],
+ ) -> lopdf::Result<&'a lopdf::Object>;
+}
+
+impl DetailPredicate<PdfPredicate> {
+ fn eval_doc(&self, doc: &lopdf::Document) -> bool {
+ match &self.d {
+ Detail::PageCount(n) => doc.get_page_count() == *n,
+ Detail::PageSize(d, idx) => doc.get_page_size(*idx).map_or(false, |dim| dim == *d),
+ Detail::CreationDate(d) => doc.get_creation_date().map_or(false, |date| date == *d),
+ Detail::Link(link) => document_has_link(doc, &link),
+ Detail::Text(text) => document_has_text(doc, &text),
+ }
+ }
+
+ fn find_case_for_doc<'a>(&'a self, expected: bool, doc: &lopdf::Document) -> Option<Case<'a>> {
+ if self.eval_doc(doc) == expected {
+ let product = self.product_for_doc(doc);
+ Some(Case::new(Some(self), false).add_product(product))
+ } else {
+ None
+ }
+ }
+
+ fn product_for_doc(&self, doc: &lopdf::Document) -> Product {
+ match &self.d {
+ Detail::PageCount(_) => Product::new(
+ "actual page count",
+ format!("{} page(s)", doc.get_page_count()),
+ ),
+ Detail::PageSize(_, idx) => Product::new(
+ "actual page size",
+ match doc.get_page_size(*idx) {
+ Some(dim) => format!("{}", dim),
+ None => "None".to_string(),
+ },
+ ),
+ Detail::CreationDate(_) => Product::new(
+ "actual creation date",
+ format!("{:?}", doc.get_creation_date()),
+ ),
+ Detail::Link(_) => Product::new(
+ "actual link contents",
+ "FIXME: who knows, but it's not what we expected".to_string(),
+ ),
+ Detail::Text(_) => {
+ Product::new("actual text contents", doc.extract_text(&[1]).unwrap())
+ }
+ }
+ }
+}
+
+// Extensions to lopdf::Object; can be removed after lopdf 0.26
+trait ObjExt {
+ /// Get the object value as a float.
+ /// Unlike as_f32() this will also cast an Integer to a Real.
+ fn as_float(&self) -> lopdf::Result<f32>;
+}
+
+impl ObjExt for lopdf::Object {
+ fn as_float(&self) -> lopdf::Result<f32> {
+ match *self {
+ lopdf::Object::Integer(ref value) => Ok(*value as f32),
+ lopdf::Object::Real(ref value) => Ok(*value),
+ _ => Err(lopdf::Error::Type),
+ }
+ }
+}
+
+impl Details for lopdf::Document {
+ fn get_page_count(self: &Self) -> usize {
+ self.get_pages().len()
+ }
+
+ fn get_page_size(self: &Self, idx: usize) -> Option<Dimensions> {
+ match self.get_from_page(idx, b"MediaBox") {
+ Ok(obj) => {
+ let unit = self
+ .get_from_page(idx, b"UserUnit")
+ .and_then(ObjExt::as_float)
+ .ok();
+ Dimensions::from_media_box(obj, unit).ok()
+ }
+ Err(_) => None,
+ }
+ }
+
+ fn get_creation_date(self: &Self) -> Option<DateTime<Utc>> {
+ match self.get_from_trailer(b"CreationDate") {
+ Ok(obj) => obj.as_datetime().map(|date| date.with_timezone(&Utc)),
+ Err(_) => None,
+ }
+ }
+
+ fn get_from_trailer<'a>(self: &'a Self, key: &[u8]) -> lopdf::Result<&'a lopdf::Object> {
+ let id = self.trailer.get(b"Info")?.as_reference()?;
+ self.get_object(id)?.as_dict()?.get(key)
+ }
+
+ fn get_from_page<'a>(
+ self: &'a Self,
+ idx: usize,
+ key: &[u8],
+ ) -> lopdf::Result<&'a lopdf::Object> {
+ let mut iter = self.page_iter();
+ for _ in 0..idx {
+ let _ = iter.next();
+ }
+ match iter.next() {
+ Some(id) => self.get_object(id)?.as_dict()?.get(key),
+ None => Err(lopdf::Error::ObjectNotFound),
+ }
+ }
+}
+
+impl Predicate<[u8]> for DetailPredicate<PdfPredicate> {
+ fn eval(&self, data: &[u8]) -> bool {
+ match lopdf::Document::load_mem(data) {
+ Ok(doc) => self.eval_doc(&doc),
+ _ => false,
+ }
+ }
+
+ fn find_case<'a>(&'a self, expected: bool, data: &[u8]) -> Option<Case<'a>> {
+ match lopdf::Document::load_mem(data) {
+ Ok(doc) => self.find_case_for_doc(expected, &doc),
+ Err(e) => Some(Case::new(Some(self), false).add_product(Product::new("Error", e))),
+ }
+ }
+}
+
+impl PredicateReflection for DetailPredicate<PdfPredicate> {
+ fn children<'a>(&'a self) -> Box<dyn Iterator<Item = Child<'a>> + 'a> {
+ let params = vec![Child::new("predicate", &self.p)];
+ Box::new(params.into_iter())
+ }
+}
+
+impl fmt::Display for DetailPredicate<PdfPredicate> {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ match &self.d {
+ Detail::PageCount(n) => write!(f, "is a PDF with {} page(s)", n),
+ Detail::PageSize(d, _) => write!(f, "is a PDF sized {}", d),
+ Detail::CreationDate(d) => write!(f, "is a PDF created {:?}", d),
+ Detail::Link(l) => write!(f, "is a PDF with a link to {}", l),
+ Detail::Text(t) => write!(f, "is a PDF with \"{}\" in its text content", t),
+ }
+ }
+}
+
+// This is an extremely trivial test for a string being present in the document's
+// text objects.
+fn document_has_text(document: &lopdf::Document, needle: &str) -> bool {
+ if let Ok(haystack) = text_from_first_page(document) {
+ haystack.contains(needle)
+ } else {
+ false
+ }
+}
+
+// We do a super simple test that a PDF actually contains an Annotation object
+// with a particular link. We don't test that this annotation is actually linked
+// from a page; that would be nicer.
+fn document_has_link(document: &lopdf::Document, link_text: &str) -> bool {
+ document
+ .objects
+ .iter()
+ .map(|(_obj_id, object)| object)
+ .any(|obj| object_is_annotation_with_link(obj, link_text))
+}
+
+fn object_is_annotation_with_link(object: &Object, link_text: &str) -> bool {
+ object
+ .as_dict()
+ .map(|dict| dict_is_annotation(dict) && dict_has_a_with_link(dict, link_text))
+ .unwrap_or(false)
+}
+
+fn dict_is_annotation(dict: &Dictionary) -> bool {
+ dict.get(b"Type")
+ .and_then(|type_val| type_val.as_name_str())
+ .map(|name| name == "Annot")
+ .unwrap_or(false)
+}
+
+fn dict_has_a_with_link(dict: &Dictionary, link_text: &str) -> bool {
+ dict.get(b"A")
+ .and_then(|obj| obj.as_dict())
+ .and_then(|dict| dict.get(b"URI"))
+ .and_then(|obj| obj.as_str())
+ .map(|string| string == link_text.as_bytes())
+ .unwrap_or(false)
+}
+
+fn text_from_first_page(doc: &lopdf::Document) -> lopdf::Result<String> {
+ // This is extremely simplistic; lopdf just concatenates all the text in the page
+ // into a single string.
+ doc.extract_text(&[1])
+}