poop graph data extractor prototype
This commit is contained in:
commit
7b22bbab16
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
|
@ -0,0 +1 @@
|
||||||
|
/target
|
548
Cargo.lock
generated
Normal file
548
Cargo.lock
generated
Normal file
|
@ -0,0 +1,548 @@
|
||||||
|
# This file is automatically @generated by Cargo.
|
||||||
|
# It is not intended for manual editing.
|
||||||
|
version = 3
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "adler32"
|
||||||
|
version = "1.2.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "aae1277d39aeec15cb388266ecc24b11c80469deae6067e17a1a7aa9e5c1f234"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "aes"
|
||||||
|
version = "0.6.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "884391ef1066acaa41e766ba8f596341b96e93ce34f9a43e7d24bf0a0eaf0561"
|
||||||
|
dependencies = [
|
||||||
|
"aes-soft",
|
||||||
|
"aesni",
|
||||||
|
"cipher",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "aes-soft"
|
||||||
|
version = "0.6.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "be14c7498ea50828a38d0e24a765ed2effe92a705885b57d029cd67d45744072"
|
||||||
|
dependencies = [
|
||||||
|
"cipher",
|
||||||
|
"opaque-debug",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "aesni"
|
||||||
|
version = "0.10.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "ea2e11f5e94c2f7d386164cc2aa1f97823fed6f259e486940a71c174dd01b0ce"
|
||||||
|
dependencies = [
|
||||||
|
"cipher",
|
||||||
|
"opaque-debug",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "aho-corasick"
|
||||||
|
version = "0.7.18"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "1e37cfd5e7657ada45f742d6e99ca5788580b5c529dc78faf11ece6dc702656f"
|
||||||
|
dependencies = [
|
||||||
|
"memchr",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "autocfg"
|
||||||
|
version = "1.1.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "block-buffer"
|
||||||
|
version = "0.9.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "4152116fd6e9dadb291ae18fc1ec3575ed6d84c29642d97890f4b4a3417297e4"
|
||||||
|
dependencies = [
|
||||||
|
"generic-array",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "block-modes"
|
||||||
|
version = "0.7.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "57a0e8073e8baa88212fb5823574c02ebccb395136ba9a164ab89379ec6072f0"
|
||||||
|
dependencies = [
|
||||||
|
"block-padding",
|
||||||
|
"cipher",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "block-padding"
|
||||||
|
version = "0.2.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "8d696c370c750c948ada61c69a0ee2cbbb9c50b1019ddb86d9317157a99c2cae"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "byteorder"
|
||||||
|
version = "1.4.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "cfg-if"
|
||||||
|
version = "1.0.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "chrono"
|
||||||
|
version = "0.4.19"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "670ad68c9088c2a963aaa298cb369688cf3f9465ce5e2d4ca10e6e0098a1ce73"
|
||||||
|
dependencies = [
|
||||||
|
"libc",
|
||||||
|
"num-integer",
|
||||||
|
"num-traits",
|
||||||
|
"time 0.1.44",
|
||||||
|
"winapi",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "cipher"
|
||||||
|
version = "0.2.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "12f8e7987cbd042a63249497f41aed09f8e65add917ea6566effbc56578d6801"
|
||||||
|
dependencies = [
|
||||||
|
"generic-array",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "cpufeatures"
|
||||||
|
version = "0.2.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "59a6001667ab124aebae2a495118e11d30984c3a653e99d86d58971708cf5e4b"
|
||||||
|
dependencies = [
|
||||||
|
"libc",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "deflate"
|
||||||
|
version = "0.9.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5f95bf05dffba6e6cce8dfbb30def788154949ccd9aed761b472119c21e01c70"
|
||||||
|
dependencies = [
|
||||||
|
"adler32",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "digest"
|
||||||
|
version = "0.9.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d3dd60d1080a57a05ab032377049e0591415d2b31afd7028356dbf3cc6dcb066"
|
||||||
|
dependencies = [
|
||||||
|
"generic-array",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "doc-comment"
|
||||||
|
version = "0.3.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "either"
|
||||||
|
version = "1.6.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "fax"
|
||||||
|
version = "0.1.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "be28317220fbcf14cead021ae0a973a4433df68fd3c3a022bf8a4fb3bdc3ba8e"
|
||||||
|
dependencies = [
|
||||||
|
"fax_derive",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "fax_derive"
|
||||||
|
version = "0.1.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "3c1d7ffc9f2dc8316348c75281a99c8fdc60c1ddf4f82a366d117bf1b74d5a39"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "generic-array"
|
||||||
|
version = "0.14.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "fd48d33ec7f05fbfa152300fdad764757cbded343c1aa1cff2fbaf4134851803"
|
||||||
|
dependencies = [
|
||||||
|
"typenum",
|
||||||
|
"version_check",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "glob"
|
||||||
|
version = "0.3.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "inflate"
|
||||||
|
version = "0.4.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "1cdb29978cc5797bd8dcc8e5bf7de604891df2a8dc576973d71a281e916db2ff"
|
||||||
|
dependencies = [
|
||||||
|
"adler32",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "itertools"
|
||||||
|
version = "0.10.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "a9a9d19fa1e79b6215ff29b9d6880b706147f16e9b1dbb1e4e5947b5b02bc5e3"
|
||||||
|
dependencies = [
|
||||||
|
"either",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "itoa"
|
||||||
|
version = "1.0.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "1aab8fc367588b89dcee83ab0fd66b72b50b72fa1904d7095045ace2b0c81c35"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "jpeg-decoder"
|
||||||
|
version = "0.1.22"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "229d53d58899083193af11e15917b5640cd40b29ff475a1fe4ef725deb02d0f2"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "lazy_static"
|
||||||
|
version = "1.4.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "libc"
|
||||||
|
version = "0.2.121"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "efaa7b300f3b5fe8eb6bf21ce3895e1751d9665086af2d64b42f19701015ff4f"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "log"
|
||||||
|
version = "0.4.16"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "6389c490849ff5bc16be905ae24bc913a9c8892e19b2341dbc175e14c341c2b8"
|
||||||
|
dependencies = [
|
||||||
|
"cfg-if",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "md5"
|
||||||
|
version = "0.7.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "memchr"
|
||||||
|
version = "2.4.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "num-integer"
|
||||||
|
version = "0.1.44"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d2cc698a63b549a70bc047073d2949cce27cd1c7b0a4a862d08a8031bc2801db"
|
||||||
|
dependencies = [
|
||||||
|
"autocfg",
|
||||||
|
"num-traits",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "num-traits"
|
||||||
|
version = "0.2.14"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "9a64b1ec5cda2586e284722486d802acf1f7dbdc623e2bfc57e65ca1cd099290"
|
||||||
|
dependencies = [
|
||||||
|
"autocfg",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "num_threads"
|
||||||
|
version = "0.1.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "aba1801fb138d8e85e11d0fc70baf4fe1cdfffda7c6cd34a854905df588e5ed0"
|
||||||
|
dependencies = [
|
||||||
|
"libc",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "once_cell"
|
||||||
|
version = "1.10.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "87f3e037eac156d1775da914196f0f37741a274155e34a0b7e427c35d2a2ecb9"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "opaque-debug"
|
||||||
|
version = "0.3.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "624a8340c38c1b80fd549087862da4ba43e08858af025b236e509b6649fc13d5"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "ordermap"
|
||||||
|
version = "0.4.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "91409674c628d07a6b4b79cc877c6b63ba5ccbfbadddd77ca822f55069ed1bd4"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pdf"
|
||||||
|
version = "0.7.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "6f48644b2f4e9c3f3468d7c31fa75e9b823dfb167c8581ec15e90a90c34430d9"
|
||||||
|
dependencies = [
|
||||||
|
"aes",
|
||||||
|
"block-modes",
|
||||||
|
"byteorder",
|
||||||
|
"chrono",
|
||||||
|
"deflate",
|
||||||
|
"fax",
|
||||||
|
"glob",
|
||||||
|
"inflate",
|
||||||
|
"itertools",
|
||||||
|
"jpeg-decoder",
|
||||||
|
"log",
|
||||||
|
"md5",
|
||||||
|
"num-traits",
|
||||||
|
"once_cell",
|
||||||
|
"ordermap",
|
||||||
|
"pdf_derive",
|
||||||
|
"sha2",
|
||||||
|
"snafu",
|
||||||
|
"stringprep",
|
||||||
|
"weezl",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pdf_derive"
|
||||||
|
version = "0.1.22"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "7f4007262775d0798de87b15cbc64cf1aed5f7ee87eec847e297b69d8ed4b4f8"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "poop-graph"
|
||||||
|
version = "0.1.0"
|
||||||
|
dependencies = [
|
||||||
|
"lazy_static",
|
||||||
|
"pdf",
|
||||||
|
"regex",
|
||||||
|
"time 0.3.9",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "proc-macro2"
|
||||||
|
version = "1.0.36"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "c7342d5883fbccae1cc37a2353b09c87c9b0f3afd73f5fb9bba687a1f733b029"
|
||||||
|
dependencies = [
|
||||||
|
"unicode-xid",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "quote"
|
||||||
|
version = "1.0.17"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "632d02bff7f874a36f33ea8bb416cd484b90cc66c1194b1a1110d067a7013f58"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "regex"
|
||||||
|
version = "1.5.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "1a11647b6b25ff05a515cb92c365cec08801e83423a235b51e231e1808747286"
|
||||||
|
dependencies = [
|
||||||
|
"aho-corasick",
|
||||||
|
"memchr",
|
||||||
|
"regex-syntax",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "regex-syntax"
|
||||||
|
version = "0.6.25"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "sha2"
|
||||||
|
version = "0.9.9"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "4d58a1e1bf39749807d89cf2d98ac2dfa0ff1cb3faa38fbb64dd88ac8013d800"
|
||||||
|
dependencies = [
|
||||||
|
"block-buffer",
|
||||||
|
"cfg-if",
|
||||||
|
"cpufeatures",
|
||||||
|
"digest",
|
||||||
|
"opaque-debug",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "snafu"
|
||||||
|
version = "0.6.10"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "eab12d3c261b2308b0d80c26fffb58d17eba81a4be97890101f416b478c79ca7"
|
||||||
|
dependencies = [
|
||||||
|
"doc-comment",
|
||||||
|
"snafu-derive",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "snafu-derive"
|
||||||
|
version = "0.6.10"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "1508efa03c362e23817f96cde18abed596a25219a8b2c66e8db33c03543d315b"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "stringprep"
|
||||||
|
version = "0.1.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "8ee348cb74b87454fff4b551cbf727025810a004f88aeacae7f85b87f4e9a1c1"
|
||||||
|
dependencies = [
|
||||||
|
"unicode-bidi",
|
||||||
|
"unicode-normalization",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "syn"
|
||||||
|
version = "1.0.90"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "704df27628939572cd88d33f171cd6f896f4eaca85252c6e0a72d8d8287ee86f"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"unicode-xid",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "time"
|
||||||
|
version = "0.1.44"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "6db9e6914ab8b1ae1c260a4ae7a49b6c5611b40328a735b21862567685e73255"
|
||||||
|
dependencies = [
|
||||||
|
"libc",
|
||||||
|
"wasi",
|
||||||
|
"winapi",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "time"
|
||||||
|
version = "0.3.9"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "c2702e08a7a860f005826c6815dcac101b19b5eb330c27fe4a5928fec1d20ddd"
|
||||||
|
dependencies = [
|
||||||
|
"itoa",
|
||||||
|
"libc",
|
||||||
|
"num_threads",
|
||||||
|
"time-macros",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "time-macros"
|
||||||
|
version = "0.2.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "42657b1a6f4d817cda8e7a0ace261fe0cc946cf3a80314390b22cc61ae080792"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "tinyvec"
|
||||||
|
version = "1.5.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "2c1c1d5a42b6245520c249549ec267180beaffcc0615401ac8e31853d4b6d8d2"
|
||||||
|
dependencies = [
|
||||||
|
"tinyvec_macros",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "tinyvec_macros"
|
||||||
|
version = "0.1.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "typenum"
|
||||||
|
version = "1.15.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "dcf81ac59edc17cc8697ff311e8f5ef2d99fcbd9817b34cec66f90b6c3dfd987"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "unicode-bidi"
|
||||||
|
version = "0.3.7"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "1a01404663e3db436ed2746d9fefef640d868edae3cceb81c3b8d5732fda678f"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "unicode-normalization"
|
||||||
|
version = "0.1.19"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d54590932941a9e9266f0832deed84ebe1bf2e4c9e4a3554d393d18f5e854bf9"
|
||||||
|
dependencies = [
|
||||||
|
"tinyvec",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "unicode-xid"
|
||||||
|
version = "0.2.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "version_check"
|
||||||
|
version = "0.9.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "wasi"
|
||||||
|
version = "0.10.0+wasi-snapshot-preview1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "weezl"
|
||||||
|
version = "0.1.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d8b77fdfd5a253be4ab714e4ffa3c49caf146b4de743e97510c0656cf90f1e8e"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "winapi"
|
||||||
|
version = "0.3.9"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
|
||||||
|
dependencies = [
|
||||||
|
"winapi-i686-pc-windows-gnu",
|
||||||
|
"winapi-x86_64-pc-windows-gnu",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "winapi-i686-pc-windows-gnu"
|
||||||
|
version = "0.4.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "winapi-x86_64-pc-windows-gnu"
|
||||||
|
version = "0.4.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
|
12
Cargo.toml
Normal file
12
Cargo.toml
Normal file
|
@ -0,0 +1,12 @@
|
||||||
|
[package]
|
||||||
|
name = "poop-graph"
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2021"
|
||||||
|
|
||||||
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
lazy_static = "1.4"
|
||||||
|
pdf = "0.7.2"
|
||||||
|
regex = "1.5.5"
|
||||||
|
time = { version = "0.3.9", features = ["formatting", "macros", "parsing"] }
|
190
src/main.rs
Normal file
190
src/main.rs
Normal file
|
@ -0,0 +1,190 @@
|
||||||
|
use std::collections::HashMap;
|
||||||
|
|
||||||
|
use lazy_static::lazy_static;
|
||||||
|
use pdf::{primitive::Primitive, content::Operation, backend::Backend};
|
||||||
|
use regex::Regex;
|
||||||
|
use time::Date;
|
||||||
|
|
||||||
|
const POSITION_ERROR_MARGIN: f32 = 2.0;
|
||||||
|
|
||||||
|
lazy_static! {
|
||||||
|
static ref DATE_REGEX: Regex = Regex::new(r"^\d{1,2}/\d{1,2}/\d{4}$").unwrap();
|
||||||
|
static ref VALUE_REGEX: Regex = Regex::new(r"^\d+$").unwrap();
|
||||||
|
static ref WHITESPACE_REGEX: Regex = Regex::new(r"\s+").unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
const DATE_PARSE_FORMAT: &[time::format_description::FormatItem] = time::macros::format_description!(
|
||||||
|
"[month padding:none]/[day padding:none]/[year]"
|
||||||
|
);
|
||||||
|
const DATE_DISPLAY_FORMAT: &[time::format_description::FormatItem] = time::macros::format_description!(
|
||||||
|
"[year]-[month]-[day]"
|
||||||
|
);
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
struct DataPoint<'a> {
|
||||||
|
date: Date,
|
||||||
|
values: HashMap<&'a str, u32>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> DataPoint<'a> {
|
||||||
|
fn new(text: &DocumentText) -> Result<Self, time::error::Parse> {
|
||||||
|
Ok(Self { date: Date::parse(&text.text, DATE_PARSE_FORMAT)?, values: HashMap::new() })
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct DocumentText {
|
||||||
|
x: f32,
|
||||||
|
y: f32,
|
||||||
|
text: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
struct DocumentIterator<'a> {
|
||||||
|
x: f32,
|
||||||
|
y: f32,
|
||||||
|
operations: Box<dyn Iterator<Item = Operation> + 'a>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> DocumentIterator<'a> {
|
||||||
|
fn new<B: Backend>(document: &'a pdf::file::File<B>) -> Self {
|
||||||
|
Self {
|
||||||
|
x: 0.0,
|
||||||
|
y: 0.0,
|
||||||
|
operations: Box::new(
|
||||||
|
document
|
||||||
|
.pages()
|
||||||
|
.filter_map(|page| {
|
||||||
|
Some(page.ok()?.contents.clone()?.operations.into_iter())
|
||||||
|
})
|
||||||
|
.flatten()
|
||||||
|
),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> Iterator for DocumentIterator<'a> {
|
||||||
|
type Item = DocumentText;
|
||||||
|
|
||||||
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
|
for Operation { operator, operands } in self.operations.as_mut() {
|
||||||
|
if operator == "Tm" {
|
||||||
|
if let (Some(x), Some(y)) = (
|
||||||
|
operands.get(4).and_then(extract_number),
|
||||||
|
operands.get(5).and_then(extract_number),
|
||||||
|
) {
|
||||||
|
self.x = x;
|
||||||
|
self.y = y;
|
||||||
|
}
|
||||||
|
} else if operator == "TJ" || operator == "Tj" {
|
||||||
|
if let Some(text) = operands.get(0).and_then(extract_string) {
|
||||||
|
return Some(DocumentText {
|
||||||
|
x: self.x,
|
||||||
|
y: self.y,
|
||||||
|
text,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn extract_number(p: &Primitive) -> Option<f32> {
|
||||||
|
match p {
|
||||||
|
Primitive::Number(n) => Some(*n),
|
||||||
|
Primitive::Integer(n) => Some(*n as f32),
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn extract_string(p: &Primitive) -> Option<String> {
|
||||||
|
let result: Box<dyn AsRef<str>> = match p {
|
||||||
|
Primitive::Array(array) => {
|
||||||
|
let mut acc = String::new();
|
||||||
|
for element in array.iter() {
|
||||||
|
if let Primitive::String(s) = element {
|
||||||
|
acc += s.as_str().ok()?.as_ref();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Box::new(acc)
|
||||||
|
}
|
||||||
|
Primitive::String(s) => Box::new(s.as_str().ok()?),
|
||||||
|
_ => return None
|
||||||
|
};
|
||||||
|
Some(WHITESPACE_REGEX.replace_all((*result).as_ref().trim(), " ").to_string())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_new_column_header(s: &str) -> bool {
|
||||||
|
s.starts_with("Northern") || s.starts_with("Southern")
|
||||||
|
}
|
||||||
|
|
||||||
|
fn main() {
|
||||||
|
let doc = pdf::file::File::open("data.pdf").expect("Failed to read PDF");
|
||||||
|
let mut doc_iter = DocumentIterator::new(&doc).peekable();
|
||||||
|
|
||||||
|
let mut columns: Vec<(String, f32)> = Vec::new();
|
||||||
|
let mut datapoints: Vec<DataPoint> = Vec::new();
|
||||||
|
|
||||||
|
let (mut current_datapoint, mut current_y) = loop {
|
||||||
|
if let Some(text) = doc_iter.next() {
|
||||||
|
if is_new_column_header(&text.text) {
|
||||||
|
let mut column_name = text.text;
|
||||||
|
let column_x = text.x;
|
||||||
|
while let Some(more) = doc_iter.peek() {
|
||||||
|
if is_new_column_header(&more.text) || DATE_REGEX.is_match(&more.text) {
|
||||||
|
columns.push((column_name, column_x));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
column_name += " ";
|
||||||
|
column_name += &more.text;
|
||||||
|
doc_iter.next();
|
||||||
|
}
|
||||||
|
} else if DATE_REGEX.is_match(&text.text) {
|
||||||
|
break (DataPoint::new(&text).expect("Failed to parse date!"), text.y);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
columns.sort_by(|(_, x0), (_, x1)| x0.partial_cmp(x1).unwrap_or(std::cmp::Ordering::Equal));
|
||||||
|
|
||||||
|
for text in doc_iter {
|
||||||
|
if DATE_REGEX.is_match(&text.text) {
|
||||||
|
datapoints.push(std::mem::replace(
|
||||||
|
&mut current_datapoint,
|
||||||
|
DataPoint::new(&text).expect("Failed to parse date!"),
|
||||||
|
));
|
||||||
|
current_y = text.y;
|
||||||
|
} else if VALUE_REGEX.is_match(&text.text) {
|
||||||
|
if (current_y - text.y).abs() > POSITION_ERROR_MARGIN {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if let Some((column, _)) = columns.iter().rev().find(|(_, x)| *x < text.x) {
|
||||||
|
current_datapoint.values.insert(column, text.text.parse().expect("Failed to parse value!"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
print!("Date");
|
||||||
|
for (column, _) in columns.iter() {
|
||||||
|
print!(",{}", column);
|
||||||
|
}
|
||||||
|
println!();
|
||||||
|
for datapoint in datapoints.iter() {
|
||||||
|
print!(
|
||||||
|
"{}",
|
||||||
|
datapoint
|
||||||
|
.date
|
||||||
|
.format(DATE_DISPLAY_FORMAT)
|
||||||
|
.expect("Failed to format date!")
|
||||||
|
);
|
||||||
|
for (column, _) in columns.iter() {
|
||||||
|
if let Some(val) = datapoint.values.get(&column.as_ref()) {
|
||||||
|
print!(",{}", val);
|
||||||
|
} else {
|
||||||
|
print!(",");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
println!();
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in a new issue