From f295f1e17a21be35b165d81a1fc72a52c6dab362 Mon Sep 17 00:00:00 2001 From: Andreas Stefl Date: Wed, 15 Oct 2025 23:21:34 +0200 Subject: [PATCH 1/8] draft --- CMakeLists.txt | 13 ++- conan.lock | 121 +++++++++++++------------ conanfile.py | 5 + src/odr/global_params.cpp | 11 ++- src/odr/global_params.hpp | 3 + src/odr/internal/libmagic/libmagic.cpp | 48 ++++++++++ src/odr/internal/libmagic/libmagic.hpp | 7 ++ src/odr/internal/magic.cpp | 9 ++ src/odr/internal/magic.hpp | 4 +- src/odr/internal/project_info.cpp.in | 12 +++ src/odr/internal/project_info.hpp | 3 + 11 files changed, 170 insertions(+), 66 deletions(-) create mode 100644 src/odr/internal/libmagic/libmagic.cpp create mode 100644 src/odr/internal/libmagic/libmagic.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 413706393..7a39152f9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -16,7 +16,6 @@ option(ODR_CLI "enable command line interface" ON) option(ODR_CLANG_TIDY "Run clang-tidy static analysis" OFF) option(WITH_PDF2HTMLEX "Build with pdf2htmlEX" ON) option(WITH_WVWARE "Build with wvWare" ON) -option(WITH_CUSTOM_TMPFILE "Build with custom temporary file implementation" OFF) # TODO defining global compiler flags seems to be bad practice with conan # TODO consider using conan profiles @@ -245,15 +244,19 @@ if (WITH_WVWARE) ODR_WITH_WVWARE ) endif () -if (WITH_CUSTOM_TMPFILE) - find_package(tmpfile REQUIRED CONFIG) +if (WITH_LIBMAGIC) + find_package(libmagic REQUIRED) + target_sources(odr + PRIVATE + "src/odr/internal/libmagic/libmagic.cpp" + ) target_link_libraries(odr PRIVATE - tmpfile::tmpfile + libmagic::libmagic ) target_compile_definitions(odr PRIVATE - ODR_WITH_CUSTOM_TMPFILE + ODR_WITH_LIBMAGIC ) endif () diff --git a/conan.lock b/conan.lock index 6632e1ec8..ac2a66b8e 100644 --- a/conan.lock +++ b/conan.lock @@ -1,76 +1,79 @@ { "version": "0.5", "requires": [ - "zlib/1.3.1#b8bc2603263cf7eccbd6e17e66b0ed76%1754336059.551", - "wvware/1.2.9-odr#feeef48fa83e803330bbd3c706718618%1754339134.808372", - "vincentlaucsb-csv-parser/2.3.0#ac67e368e82c9e3da4a663c35e3a1b2f%1754336215.262", - "utfcpp/4.0.4#cd6efc5d62de4e9ee0b5f3abd0df37a9%1754336214.459", - "uchardet/0.0.8#a9026ce72d955751db2dbda62c83c430%1754336213.652", - "pugixml/1.14#c6afdcf73d71858303d8260b0d76ff91%1754336212.85", - "poppler-data/0.4.12-odr#06cdb12e4cab52261a5eb6c7d7dad273%1754339136.6022458", - "poppler/24.08.0-odr#2929132c6c3c67155c6c8f050c923916%1754339139.168313", + "zstd/1.5.7#fde461c0d847a22f16d3066774f61b11%1744114235.235", + "zlib/1.3.1#b8bc2603263cf7eccbd6e17e66b0ed76%1733936244.862", + "xz_utils/5.4.5#b885d1d79c9d30cff3803f7f551dbe66%1724318972.064", + "wvware/1.2.9-odr#feeef48fa83e803330bbd3c706718618%1754432357.893", + "vincentlaucsb-csv-parser/2.3.0#ac67e368e82c9e3da4a663c35e3a1b2f%1718528275.177", + "util-linux-libuuid/2.39.2#637bd312b6310c18190469fae4e1d480%1748076007.711", + "utfcpp/4.0.4#cd6efc5d62de4e9ee0b5f3abd0df37a9%1722968964.685", + "uchardet/0.0.8#a9026ce72d955751db2dbda62c83c430%1703972855.616", + "pugixml/1.14#c6afdcf73d71858303d8260b0d76ff91%1696206310.014", + "poppler-data/0.4.12-odr#06cdb12e4cab52261a5eb6c7d7dad273%1754427610.1551769", + "poppler/24.08.0-odr#2929132c6c3c67155c6c8f050c923916%1754427612.566178", "pixman/0.43.4#0dcdf859941e32fcc7bfb73ea1946a7f%1718828937.421", - "pdf2htmlex/0.18.8.rc1-odr-git-eb5d291#01227c46d8bced149c1e2dd031406e57%1754339135.855592", - "pcre2/10.42#9a35f5089feb875ec61a38eca364ce77%1754336050.663", + "pdf2htmlex/0.18.8.rc1-odr-git-eb5d291#01227c46d8bced149c1e2dd031406e57%1754427609.135946", + "pcre2/10.42#9a35f5089feb875ec61a38eca364ce77%1743524593.693", "openlibm/0.8.3#61d8454cf655e95e0d3d3359bde58ac7%1754339136.456942", "openjpeg/2.5.2#6f7b733e151d1bbf5ed05cbabb846828%1709653017.024", - "nlohmann_json/3.11.3#45828be26eb619a2e04ca517bb7b828d%1754336206.708", - "miniz/3.0.2#bfbce07c6654293cce27ee24129d2df7%1754336205.867", + "nlohmann_json/3.11.3#45828be26eb619a2e04ca517bb7b828d%1701220705.259", + "miniz/3.0.2#bfbce07c6654293cce27ee24129d2df7%1743673472.805", "lzo/2.10#5725914235423c771cb1c6b607109b45%1685099992.798", - "libxml2/2.12.7#1c4d20b7ab8b618ce699733723ba4df6%1754336034.974", - "libpng/1.6.48#dd0fc04a42b9a23bce065545a81d4847%1754336033.871", - "libjpeg/9f#8edfe2699565c80c825d0256002504ff%1754336032.766", - "libiconv/1.17#1e65319e945f2d31941a9d28cc13c058%1754336113.172", - "libgsf/1.14.52#9b22c41267004c80ba5cde5d01e23a24%1754339136.153343", - "libgettext/0.22#35d2811b2dd27a98f69e4daa86ca2000%1754336030.102", - "libffi/3.4.8#06926dca35bcf8e321fcc24def952cde%1754336028.993", - "libelf/0.8.13#ba59bbc89757ed62cfd7690a73bf81be%1754336027.877", + "libxml2/2.12.7#1c4d20b7ab8b618ce699733723ba4df6%1721306327.767", + "libselinux/3.6#5a78ff6ae5034eeaac8da723361a8ce4%1748075177.52", + "libpng/1.6.48#dd0fc04a42b9a23bce065545a81d4847%1746141949.748", + "libmagic/5.45#791d5bad38d33272bb120994a198b1ac%1727273086.09", + "libjpeg/9f#8edfe2699565c80c825d0256002504ff%1723665907.087", + "libiconv/1.17#1e65319e945f2d31941a9d28cc13c058%1751451666.321", + "libgsf/1.14.52#9b22c41267004c80ba5cde5d01e23a24%1754427609.682325", + "libgettext/0.22#35d2811b2dd27a98f69e4daa86ca2000%1714393058.647", + "libffi/3.4.8#06926dca35bcf8e321fcc24def952cde%1748531860.405", + "libelf/0.8.13#ba59bbc89757ed62cfd7690a73bf81be%1741781951.327", "lcms/2.16#fb083506ff40fd950c9e5c39df8bed54%1703969656.459", - "gtest/1.14.0#f8f0757a574a8dd747d16af62d6eb1b7%1754336203.113", - "glib/2.81.0-odr#ddf445d5af468f972978af93c44d26e1%1754339135.261312", - "giflib/5.2.2#3923fc0f7ffec2f0bdbdee9b548f9248%1754336019.711", - "freetype/2.13.2#5d2563803c8558d4ef47271a82c73d20%1754336015.019", - "fontforge/20240423-git#525c82a5e57385c14b647b966e86ea58%1754339139.32232", - "fontconfig/2.15.0-odr#2febbef44ca469204c8ff38cfd21db59%1754339136.30191", + "gtest/1.14.0#f8f0757a574a8dd747d16af62d6eb1b7%1743410807.169", + "glib/2.81.0-odr#ddf445d5af468f972978af93c44d26e1%1754427608.367101", + "giflib/5.2.2#3923fc0f7ffec2f0bdbdee9b548f9248%1731663927.701", + "freetype/2.13.2#5d2563803c8558d4ef47271a82c73d20%1728736671.752", + "fontforge/20240423-git#525c82a5e57385c14b647b966e86ea58%1754432335.33", + "fontconfig/2.15.0-odr#2febbef44ca469204c8ff38cfd21db59%1754427609.845061", "expat/2.6.3#39b80d3109fbe578fddfe4951f0b1d57%1725469045.298", - "cryptopp/8.9.0#fe3de584c28c0ecc938a1671e3f1bd72%1754336202.171", - "cpp-httplib/0.16.3#7aa89fbb81ffd19539a49fc132502966%1754336201.354", - "cairo/1.18.0-odr#c1d0ad14a91ad6d161e756d54277ac13%1754339136.007648", - "bzip2/1.0.8#00b4a4658791c1f06914e087f0e792f5%1754336009.237", - "brotli/1.1.0#406ce8f1c997f4ef7852fa01ff85ef9f%1754336007.831", + "cryptopp/8.9.0#fe3de584c28c0ecc938a1671e3f1bd72%1731421245.374", + "cpp-httplib/0.16.3#7aa89fbb81ffd19539a49fc132502966%1748426320.106", + "cairo/1.18.0-odr#c1d0ad14a91ad6d161e756d54277ac13%1754427609.450017", + "bzip2/1.0.8#00b4a4658791c1f06914e087f0e792f5%1744702067.178", + "brotli/1.1.0#406ce8f1c997f4ef7852fa01ff85ef9f%1743158659.041", "boost/1.86.0#cd839a2082585255010f9e82eea94c7f%1728027203.247", - "argon2/20190702-odr#965901884bc82ec8a7c0a1305d42c127%1754339135.113191", - "libselinux/3.6#5a78ff6ae5034eeaac8da723361a8ce4%1748075177.52", - "util-linux-libuuid/2.39.2#637bd312b6310c18190469fae4e1d480%1748076007.711" + "argon2/20190702-odr#965901884bc82ec8a7c0a1305d42c127%1754427608.210625" ], "build_requires": [ - "zlib/1.3.1#b8bc2603263cf7eccbd6e17e66b0ed76%1754336059.551", - "pkgconf/2.2.0#6462942a22803086372db44689ba825f%1754336122.883", - "pkgconf/2.1.0#27f44583701117b571307cf5b5fe5605%1754336055.341", - "pkgconf/2.0.3#f996677e96e61e6552d85e83756c328b%1754336054.459", - "pcre2/10.42#9a35f5089feb875ec61a38eca364ce77%1754336050.663", - "ninja/1.13.0#53ff096207a5599ced46a633271b3cef%1754336120.489", - "meson/1.4.0#2262941cc8fbb0099dd0c196ca2a6c01%1754336038.389", - "meson/1.3.2#26ce8a76a36cc275cdfee1d757bc6561%1754336037.623", - "meson/1.2.2#21b73818ba96d9eea465b310b5bbc993%1754336037.251", + "zlib/1.3.1#b8bc2603263cf7eccbd6e17e66b0ed76%1733936244.862", + "pkgconf/2.2.0#6462942a22803086372db44689ba825f%1713364853.749", + "pkgconf/2.1.0#27f44583701117b571307cf5b5fe5605%1701537936.436", + "pkgconf/2.0.3#f996677e96e61e6552d85e83756c328b%1696606182.229", + "pcre2/10.42#9a35f5089feb875ec61a38eca364ce77%1743524593.693", + "ninja/1.13.0#53ff096207a5599ced46a633271b3cef%1751046277.036", + "meson/1.4.0#2262941cc8fbb0099dd0c196ca2a6c01%1726730116.631", + "meson/1.3.2#26ce8a76a36cc275cdfee1d757bc6561%1726730118.251", + "meson/1.2.2#21b73818ba96d9eea465b310b5bbc993%1726730120.212", "meson/1.2.1#f2b0c7763308df8e33172744dace8845%1726730117.905", - "m4/1.4.19#b38ced39a01e31fef5435bc634461fd2%1754336036.417", - "libtool/2.4.7#a182d7ce8d4c346a19dbd4a5d532ef68%1754336116.018", - "libiconv/1.17#1e65319e945f2d31941a9d28cc13c058%1754336113.172", - "libgettext/0.22#35d2811b2dd27a98f69e4daa86ca2000%1754336030.102", - "libffi/3.4.8#06926dca35bcf8e321fcc24def952cde%1754336028.993", - "libelf/0.8.13#ba59bbc89757ed62cfd7690a73bf81be%1754336027.877", - "gtk-doc-stub/cci.20181216#09072d684ce1458596b44a30a747494c%1754336106.527", - "gperf/3.1#1d622ad9717e9348ed3685c9994ad0b9%1709324989.76", - "gnu-config/cci.20210814#dc430d754f465e8c74463019672fb97b%1754336027.04", - "glib/2.81.0-odr#ddf445d5af468f972978af93c44d26e1%1754339135.261312", - "gettext/0.22.5#a1f31cc77dee0345699745ef39686dd0%1754336091.687", - "cmake/3.31.8#dd6e07c418afc4b30cb1c21584dccc49%1754336200.956", - "bzip2/1.0.8#00b4a4658791c1f06914e087f0e792f5%1754336009.237", - "automake/1.16.5#058bda3e21c36c9aa8425daf3c1faf50%1754336006.921", - "autoconf/2.71#f9307992909d7fb3df459340f1932809%1754336006.057", + "m4/1.4.19#b38ced39a01e31fef5435bc634461fd2%1700758725.451", + "libtool/2.4.7#a182d7ce8d4c346a19dbd4a5d532ef68%1742900203.747", "libselinux/3.6#5a78ff6ae5034eeaac8da723361a8ce4%1748075177.52", - "flex/2.6.4#e35bc44b3fcbcd661e0af0dc5b5b1ad4%1748075168.656" + "libiconv/1.17#1e65319e945f2d31941a9d28cc13c058%1751451666.321", + "libgettext/0.22#35d2811b2dd27a98f69e4daa86ca2000%1714393058.647", + "libffi/3.4.8#06926dca35bcf8e321fcc24def952cde%1748531860.405", + "libelf/0.8.13#ba59bbc89757ed62cfd7690a73bf81be%1741781951.327", + "gtk-doc-stub/cci.20181216#09072d684ce1458596b44a30a747494c%1687277608.37", + "gperf/3.1#1d622ad9717e9348ed3685c9994ad0b9%1709324989.76", + "gnu-config/cci.20210814#dc430d754f465e8c74463019672fb97b%1701248168.479", + "glib/2.81.0-odr#ddf445d5af468f972978af93c44d26e1%1754427608.367101", + "gettext/0.22.5#a1f31cc77dee0345699745ef39686dd0%1750252839.982", + "flex/2.6.4#e35bc44b3fcbcd661e0af0dc5b5b1ad4%1748075168.656", + "cmake/3.31.8#dd6e07c418afc4b30cb1c21584dccc49%1750223587.75", + "bzip2/1.0.8#00b4a4658791c1f06914e087f0e792f5%1744702067.178", + "automake/1.16.5#058bda3e21c36c9aa8425daf3c1faf50%1688481772.751", + "autoconf/2.71#f9307992909d7fb3df459340f1932809%1711983104.648" ], "python_requires": [], "config_requires": [] diff --git a/conanfile.py b/conanfile.py index 738ec7cf6..7669c2da8 100644 --- a/conanfile.py +++ b/conanfile.py @@ -19,12 +19,14 @@ class OpenDocumentCoreConan(ConanFile): "fPIC": [True, False], "with_pdf2htmlEX": [True, False], "with_wvWare": [True, False], + "with_libmagic": [True, False], } default_options = { "shared": False, "fPIC": True, "with_pdf2htmlEX": True, "with_wvWare": True, + "with_libmagic": True, } exports_sources = ["cli/*", "cmake/*", "resources/dist/*", "src/*", "CMakeLists.txt"] @@ -49,6 +51,8 @@ def requirements(self): self.requires("wvware/1.2.9-odr") self.requires("cpp-httplib/0.16.3") self.requires("argon2/20190702-odr") + if self.options.get_safe("with_libmagic", False): + self.requires("libmagic/5.45") def build_requirements(self): self.test_requires("gtest/1.14.0") @@ -67,6 +71,7 @@ def generate(self): tc.variables["ODR_TEST"] = False tc.variables["WITH_PDF2HTMLEX"] = self.options.get_safe("with_pdf2htmlEX", False) tc.variables["WITH_WVWARE"] = self.options.get_safe("with_wvWare", False) + tc.variables["WITH_LIBMAGIC"] = self.options.get_safe("with_libmagic", False) # Get runenv info, exported by package_info() of dependencies # We need to obtain PDF2HTMLEX_DATA_DIR, POPPLER_DATA_DIR, FONTCONFIG_PATH and WVDATADIR diff --git a/src/odr/global_params.cpp b/src/odr/global_params.cpp index cd7f67ebb..5a9a3e52c 100644 --- a/src/odr/global_params.cpp +++ b/src/odr/global_params.cpp @@ -41,6 +41,10 @@ const std::string &GlobalParams::pdf2htmlex_data_path() { return instance().m_pdf2htmlex_data_path; } +const std::string &GlobalParams::libmagic_data_path() { + return instance().m_libmagic_data_path; +} + void GlobalParams::set_odr_core_data_path(const std::string &path) { instance().m_odr_core_data_path = path; } @@ -62,10 +66,15 @@ void GlobalParams::set_pdf2htmlex_data_path(const std::string &path) { instance().m_pdf2htmlex_data_path = path; } +void GlobalParams::set_libmagic_data_path(const std::string &path) { + instance().m_libmagic_data_path = path; +} + GlobalParams::GlobalParams() : m_odr_core_data_path{internal::project_info::odr_data_path()}, m_fontconfig_data_path{internal::project_info::fontconfig_data_path()}, m_poppler_data_path{internal::project_info::poppler_data_path()}, - m_pdf2htmlex_data_path{internal::project_info::pdf2htmlex_data_path()} {} + m_pdf2htmlex_data_path{internal::project_info::pdf2htmlex_data_path()}, + m_libmagic_data_path{internal::project_info::libmagic_data_path()} {} } // namespace odr diff --git a/src/odr/global_params.hpp b/src/odr/global_params.hpp index 9888be173..65810c712 100644 --- a/src/odr/global_params.hpp +++ b/src/odr/global_params.hpp @@ -10,11 +10,13 @@ class GlobalParams { static const std::string &fontconfig_data_path(); static const std::string &poppler_data_path(); static const std::string &pdf2htmlex_data_path(); + static const std::string &libmagic_data_path(); static void set_odr_core_data_path(const std::string &path); static void set_fontconfig_data_path(const std::string &path); static void set_poppler_data_path(const std::string &path); static void set_pdf2htmlex_data_path(const std::string &path); + static void set_libmagic_data_path(const std::string &path); private: static GlobalParams &instance(); @@ -25,6 +27,7 @@ class GlobalParams { std::string m_fontconfig_data_path; std::string m_poppler_data_path; std::string m_pdf2htmlex_data_path; + std::string m_libmagic_data_path; }; } // namespace odr diff --git a/src/odr/internal/libmagic/libmagic.cpp b/src/odr/internal/libmagic/libmagic.cpp new file mode 100644 index 000000000..ff9bb8b7f --- /dev/null +++ b/src/odr/internal/libmagic/libmagic.cpp @@ -0,0 +1,48 @@ +#include + +#include + +#include + +namespace odr::internal { + +namespace { + +void magic_deleter(const magic_t magic_cookie) { + if (magic_cookie != nullptr) { + magic_close(magic_cookie); + } +} + +magic_t get_magic_cookie() { + using Holder = + std::unique_ptr, decltype(&magic_deleter)>; + static Holder magic_cookie(nullptr, &magic_deleter); + + if (magic_cookie) { + return magic_cookie.get(); + } + + magic_cookie = Holder(magic_open(MAGIC_MIME_TYPE), &magic_deleter); + if (!magic_cookie) { + throw std::runtime_error("magic_open failed"); + } + if (magic_load(magic_cookie.get(), + GlobalParams::libmagic_data_path().c_str()) == 0) { + return magic_cookie.get(); + } + if (magic_load(magic_cookie.get(), nullptr) == 0) { + return magic_cookie.get(); + } + magic_cookie.reset(); + throw std::runtime_error("magic_load failed"); +} + +} // namespace + +const char *libmagic::mime_type(const std::string &path) { + const magic_t magic_cookie = get_magic_cookie(); + return magic_file(magic_cookie, path.c_str()); +} + +} // namespace odr::internal diff --git a/src/odr/internal/libmagic/libmagic.hpp b/src/odr/internal/libmagic/libmagic.hpp new file mode 100644 index 000000000..e628992fc --- /dev/null +++ b/src/odr/internal/libmagic/libmagic.hpp @@ -0,0 +1,7 @@ +#pragma once + +#include + +namespace odr::internal::libmagic { +const char *mime_type(const std::string &path); +} diff --git a/src/odr/internal/magic.cpp b/src/odr/internal/magic.cpp index 1831fb4b1..43ffac410 100644 --- a/src/odr/internal/magic.cpp +++ b/src/odr/internal/magic.cpp @@ -3,6 +3,7 @@ #include #include +#include #include #include @@ -86,4 +87,12 @@ FileType magic::file_type(const File &file) { return file_type(*file.stream()); } +const char *magic::mime_type(const std::string &path) { +#ifdef ODR_USE_LIBMAGIC + return libmagic::mime_type(path); +#else + return nullptr; +#endif +} + } // namespace odr::internal diff --git a/src/odr/internal/magic.hpp b/src/odr/internal/magic.hpp index c2e804015..a68c7ea42 100644 --- a/src/odr/internal/magic.hpp +++ b/src/odr/internal/magic.hpp @@ -15,6 +15,8 @@ class File; namespace odr::internal::magic { FileType file_type(const std::string &magic); FileType file_type(std::istream &in); -FileType file_type(const internal::abstract::File &file); +FileType file_type(const abstract::File &file); FileType file_type(const File &file); + +const char *mime_type(const std::string &path); } // namespace odr::internal::magic diff --git a/src/odr/internal/project_info.cpp.in b/src/odr/internal/project_info.cpp.in index 3628b823e..f06149abf 100644 --- a/src/odr/internal/project_info.cpp.in +++ b/src/odr/internal/project_info.cpp.in @@ -28,6 +28,14 @@ bool project_info::has_pdf2htmlex() noexcept{ #endif } +bool project_info::has_libmagic() noexcept{ +#ifdef ODR_WITH_LIBMAGIC + return true; +#else + return false; +#endif +} + const char *project_info::odr_data_path() noexcept { return "${ODR_DATA_PATH}"; } @@ -44,4 +52,8 @@ const char *project_info::pdf2htmlex_data_path() noexcept{ return "${PDF2HTMLEX_DATA_DIR}"; } +const char *project_info::libmagic_data_path() noexcept{ + return "${LIBMAGIC_DATA_DIR}"; +} + } // namespace odr::internal diff --git a/src/odr/internal/project_info.hpp b/src/odr/internal/project_info.hpp index 490039504..8a672ea36 100644 --- a/src/odr/internal/project_info.hpp +++ b/src/odr/internal/project_info.hpp @@ -7,9 +7,12 @@ bool is_debug() noexcept; bool has_wvware() noexcept; bool has_pdf2htmlex() noexcept; +bool has_libmagic() noexcept; + const char *odr_data_path() noexcept; const char *fontconfig_data_path() noexcept; const char *poppler_data_path() noexcept; const char *pdf2htmlex_data_path() noexcept; +const char *libmagic_data_path() noexcept; } // namespace odr::internal::project_info From d7c7671e1c896765a908ebcfe80053bdc71de3ac Mon Sep 17 00:00:00 2001 From: Andreas Stefl Date: Sun, 19 Oct 2025 15:01:46 +0200 Subject: [PATCH 2/8] more mimetypes --- src/odr/file.cpp | 13 ++ src/odr/file.hpp | 14 +- src/odr/internal/abstract/file.hpp | 1 + src/odr/internal/cfb/cfb_file.cpp | 9 +- src/odr/internal/cfb/cfb_file.hpp | 1 + src/odr/internal/common/image_file.cpp | 2 + src/odr/internal/common/image_file.hpp | 1 + src/odr/internal/csv/csv_file.cpp | 10 +- src/odr/internal/csv/csv_file.hpp | 1 + src/odr/internal/json/json_file.cpp | 7 +- src/odr/internal/json/json_file.hpp | 1 + src/odr/internal/magic.cpp | 5 +- src/odr/internal/magic.hpp | 2 +- src/odr/internal/odf/odf_file.cpp | 4 + src/odr/internal/odf/odf_file.hpp | 1 + src/odr/internal/odf/odf_meta.cpp | 18 ++- src/odr/internal/oldms/oldms_file.cpp | 25 +++- src/odr/internal/oldms/oldms_file.hpp | 1 + .../oldms_wvware/wvware_oldms_file.cpp | 5 + .../oldms_wvware/wvware_oldms_file.hpp | 1 + src/odr/internal/ooxml/ooxml_file.cpp | 4 + src/odr/internal/ooxml/ooxml_file.hpp | 1 + src/odr/internal/ooxml/ooxml_meta.cpp | 27 ++-- src/odr/internal/ooxml/ooxml_meta.hpp | 2 +- src/odr/internal/pdf/pdf_file.cpp | 4 + src/odr/internal/pdf/pdf_file.hpp | 1 + .../internal/pdf_poppler/poppler_pdf_file.cpp | 5 + .../internal/pdf_poppler/poppler_pdf_file.hpp | 1 + src/odr/internal/svm/svm_file.cpp | 9 +- src/odr/internal/svm/svm_file.hpp | 1 + src/odr/internal/text/text_file.cpp | 10 +- src/odr/internal/text/text_file.hpp | 1 + src/odr/internal/zip/zip_file.cpp | 8 +- src/odr/internal/zip/zip_file.hpp | 1 + src/odr/odr.cpp | 137 ++++++++++++++++++ src/odr/odr.hpp | 15 ++ 36 files changed, 302 insertions(+), 47 deletions(-) diff --git a/src/odr/file.cpp b/src/odr/file.cpp index b598b8123..711f5c446 100644 --- a/src/odr/file.cpp +++ b/src/odr/file.cpp @@ -6,6 +6,7 @@ #include #include +#include #include #include #include @@ -27,6 +28,12 @@ FileMeta::FileMeta(const FileType type, const bool password_encrypted, : type{type}, password_encrypted{password_encrypted}, document_meta{document_meta} {} +FileMeta::FileMeta(const FileType type, const std::string_view mimetype, + const bool password_encrypted, + const std::optional document_meta) + : type{type}, mimetype{mimetype}, password_encrypted{password_encrypted}, + document_meta{document_meta} {} + File::File() = default; File::File(std::shared_ptr impl) @@ -66,6 +73,12 @@ std::vector DecodedFile::list_file_types(const std::string &path, std::make_shared(path), logger); } +std::string_view DecodedFile::mimetype(const std::string &path, + Logger &logger) { + (void)logger; + return internal::magic::mimetype(path); +} + std::vector DecodedFile::list_decoder_engines(const FileType as) { return internal::open_strategy::list_decoder_engines(as); diff --git a/src/odr/file.hpp b/src/odr/file.hpp index 37c1f14ab..b4df693e9 100644 --- a/src/odr/file.hpp +++ b/src/odr/file.hpp @@ -5,6 +5,7 @@ #include #include #include +#include #include namespace odr::internal::abstract { @@ -140,10 +141,14 @@ struct DocumentMeta final { /// @brief Meta information about a file. struct FileMeta final { FileMeta(); + [[deprecated]] FileMeta(FileType type, bool password_encrypted, std::optional document_meta); + FileMeta(FileType type, std::string_view mimetype, bool password_encrypted, + std::optional document_meta); FileType type{FileType::unknown}; + std::string_view mimetype; bool password_encrypted{false}; std::optional document_meta; }; @@ -175,9 +180,12 @@ class File final { /// @brief Represents a decoded file. class DecodedFile { public: - static std::vector list_file_types(const std::string &path, - Logger &logger = Logger::null()); - static std::vector list_decoder_engines(FileType as); + [[nodiscard]] static std::vector + list_file_types(const std::string &path, Logger &logger = Logger::null()); + [[nodiscard]] static std::vector + list_decoder_engines(FileType as); + [[nodiscard]] static std::string_view + mimetype(const std::string &path, Logger &logger = Logger::null()); explicit DecodedFile(std::shared_ptr impl); explicit DecodedFile(const File &file, Logger &logger = Logger::null()); diff --git a/src/odr/internal/abstract/file.hpp b/src/odr/internal/abstract/file.hpp index 9906fb69e..b41eb209f 100644 --- a/src/odr/internal/abstract/file.hpp +++ b/src/odr/internal/abstract/file.hpp @@ -38,6 +38,7 @@ class DecodedFile { [[nodiscard]] virtual FileCategory file_category() const noexcept = 0; [[nodiscard]] virtual FileMeta file_meta() const noexcept = 0; [[nodiscard]] virtual DecoderEngine decoder_engine() const noexcept = 0; + [[nodiscard]] virtual std::string_view mimetype() const noexcept = 0; [[nodiscard]] virtual bool password_encrypted() const noexcept { return false; diff --git a/src/odr/internal/cfb/cfb_file.cpp b/src/odr/internal/cfb/cfb_file.cpp index 0e9677793..1bd6af703 100644 --- a/src/odr/internal/cfb/cfb_file.cpp +++ b/src/odr/internal/cfb/cfb_file.cpp @@ -17,15 +17,18 @@ FileType CfbFile::file_type() const noexcept { } FileMeta CfbFile::file_meta() const noexcept { - FileMeta meta; - meta.type = file_type(); - return meta; + return {FileType::compound_file_binary_format, "application/x-cfb", false, + std::nullopt}; } DecoderEngine CfbFile::decoder_engine() const noexcept { return DecoderEngine::odr; } +std::string_view CfbFile::mimetype() const noexcept { + return "application/x-cfb"; +} + bool CfbFile::is_decodable() const noexcept { return true; } std::shared_ptr CfbFile::archive() const { diff --git a/src/odr/internal/cfb/cfb_file.hpp b/src/odr/internal/cfb/cfb_file.hpp index 14300105c..a7dd129a0 100644 --- a/src/odr/internal/cfb/cfb_file.hpp +++ b/src/odr/internal/cfb/cfb_file.hpp @@ -26,6 +26,7 @@ class CfbFile final : public abstract::ArchiveFile { [[nodiscard]] FileType file_type() const noexcept override; [[nodiscard]] FileMeta file_meta() const noexcept override; [[nodiscard]] DecoderEngine decoder_engine() const noexcept override; + [[nodiscard]] std::string_view mimetype() const noexcept override; [[nodiscard]] bool is_decodable() const noexcept override; diff --git a/src/odr/internal/common/image_file.cpp b/src/odr/internal/common/image_file.cpp index 475c015ec..f3f520cb4 100644 --- a/src/odr/internal/common/image_file.cpp +++ b/src/odr/internal/common/image_file.cpp @@ -20,6 +20,8 @@ DecoderEngine ImageFile::decoder_engine() const noexcept { return DecoderEngine::odr; } +std::string_view ImageFile::mimetype() const noexcept { return ""; } + bool ImageFile::is_decodable() const noexcept { return false; } std::shared_ptr ImageFile::image() const { diff --git a/src/odr/internal/common/image_file.hpp b/src/odr/internal/common/image_file.hpp index 87a7b8178..0c70163d6 100644 --- a/src/odr/internal/common/image_file.hpp +++ b/src/odr/internal/common/image_file.hpp @@ -13,6 +13,7 @@ class ImageFile final : public abstract::ImageFile { [[nodiscard]] FileType file_type() const noexcept override; [[nodiscard]] FileMeta file_meta() const noexcept override; [[nodiscard]] DecoderEngine decoder_engine() const noexcept override; + [[nodiscard]] std::string_view mimetype() const noexcept override; [[nodiscard]] bool is_decodable() const noexcept override; diff --git a/src/odr/internal/csv/csv_file.cpp b/src/odr/internal/csv/csv_file.cpp index 82c61e18f..19b63d062 100644 --- a/src/odr/internal/csv/csv_file.cpp +++ b/src/odr/internal/csv/csv_file.cpp @@ -20,14 +20,16 @@ FileType CsvFile::file_type() const noexcept { return FileType::comma_separated_values; } -FileMeta CsvFile::file_meta() const noexcept { - return {FileType::comma_separated_values, false, {}}; -} - DecoderEngine CsvFile::decoder_engine() const noexcept { return DecoderEngine::odr; } +std::string_view CsvFile::mimetype() const noexcept { return "text/csv"; } + +FileMeta CsvFile::file_meta() const noexcept { + return {FileType::comma_separated_values, "text/csv", false, std::nullopt}; +} + bool CsvFile::is_decodable() const noexcept { return false; } } // namespace odr::internal::csv diff --git a/src/odr/internal/csv/csv_file.hpp b/src/odr/internal/csv/csv_file.hpp index 838af7b94..315471028 100644 --- a/src/odr/internal/csv/csv_file.hpp +++ b/src/odr/internal/csv/csv_file.hpp @@ -17,6 +17,7 @@ class CsvFile final : public abstract::TextFile { [[nodiscard]] FileType file_type() const noexcept override; [[nodiscard]] FileMeta file_meta() const noexcept override; [[nodiscard]] DecoderEngine decoder_engine() const noexcept override; + [[nodiscard]] std::string_view mimetype() const noexcept override; [[nodiscard]] bool is_decodable() const noexcept override; diff --git a/src/odr/internal/json/json_file.cpp b/src/odr/internal/json/json_file.cpp index 76dbbaaf7..78df9d119 100644 --- a/src/odr/internal/json/json_file.cpp +++ b/src/odr/internal/json/json_file.cpp @@ -21,13 +21,18 @@ FileType JsonFile::file_type() const noexcept { } FileMeta JsonFile::file_meta() const noexcept { - return {FileType::javascript_object_notation, false, {}}; + return {FileType::javascript_object_notation, "application/json", false, + std::nullopt}; } DecoderEngine JsonFile::decoder_engine() const noexcept { return DecoderEngine::odr; } +std::string_view JsonFile::mimetype() const noexcept { + return "application/json"; +} + bool JsonFile::is_decodable() const noexcept { return false; } } // namespace odr::internal::json diff --git a/src/odr/internal/json/json_file.hpp b/src/odr/internal/json/json_file.hpp index 5a7c4c973..db36bcbd8 100644 --- a/src/odr/internal/json/json_file.hpp +++ b/src/odr/internal/json/json_file.hpp @@ -17,6 +17,7 @@ class JsonFile final : public abstract::TextFile { [[nodiscard]] FileType file_type() const noexcept override; [[nodiscard]] FileMeta file_meta() const noexcept override; [[nodiscard]] DecoderEngine decoder_engine() const noexcept override; + [[nodiscard]] std::string_view mimetype() const noexcept override; [[nodiscard]] bool is_decodable() const noexcept override; diff --git a/src/odr/internal/magic.cpp b/src/odr/internal/magic.cpp index 43ffac410..e6853f275 100644 --- a/src/odr/internal/magic.cpp +++ b/src/odr/internal/magic.cpp @@ -1,6 +1,7 @@ #include #include +#include #include #include @@ -87,11 +88,11 @@ FileType magic::file_type(const File &file) { return file_type(*file.stream()); } -const char *magic::mime_type(const std::string &path) { +std::string_view magic::mimetype(const std::string &path) { #ifdef ODR_USE_LIBMAGIC return libmagic::mime_type(path); #else - return nullptr; + return odr::mimetype_by_file_type(magic::file_type(path)); #endif } diff --git a/src/odr/internal/magic.hpp b/src/odr/internal/magic.hpp index a68c7ea42..85fb4e8e4 100644 --- a/src/odr/internal/magic.hpp +++ b/src/odr/internal/magic.hpp @@ -18,5 +18,5 @@ FileType file_type(std::istream &in); FileType file_type(const abstract::File &file); FileType file_type(const File &file); -const char *mime_type(const std::string &path); +std::string_view mimetype(const std::string &path); } // namespace odr::internal::magic diff --git a/src/odr/internal/odf/odf_file.cpp b/src/odr/internal/odf/odf_file.cpp index f1fbfa148..5d41a3992 100644 --- a/src/odr/internal/odf/odf_file.cpp +++ b/src/odr/internal/odf/odf_file.cpp @@ -46,6 +46,10 @@ DecoderEngine OpenDocumentFile::decoder_engine() const noexcept { return DecoderEngine::odr; } +std::string_view OpenDocumentFile::mimetype() const noexcept { + return m_file_meta.mimetype; +} + DocumentType OpenDocumentFile::document_type() const { return m_file_meta.document_meta.value().document_type; } diff --git a/src/odr/internal/odf/odf_file.hpp b/src/odr/internal/odf/odf_file.hpp index d2075a84c..50a893483 100644 --- a/src/odr/internal/odf/odf_file.hpp +++ b/src/odr/internal/odf/odf_file.hpp @@ -28,6 +28,7 @@ class OpenDocumentFile final : public virtual abstract::DocumentFile { [[nodiscard]] FileType file_type() const noexcept override; [[nodiscard]] FileMeta file_meta() const noexcept override; [[nodiscard]] DecoderEngine decoder_engine() const noexcept override; + [[nodiscard]] std::string_view mimetype() const noexcept override; [[nodiscard]] DocumentType document_type() const override; [[nodiscard]] DocumentMeta document_meta() const override; diff --git a/src/odr/internal/odf/odf_meta.cpp b/src/odr/internal/odf/odf_meta.cpp index cb7614463..b75386672 100644 --- a/src/odr/internal/odf/odf_meta.cpp +++ b/src/odr/internal/odf/odf_meta.cpp @@ -6,7 +6,6 @@ #include #include #include -#include #include #include @@ -18,7 +17,8 @@ namespace odr::internal::odf { namespace { -bool lookup_file_type(const std::string &mime_type, FileType &file_type) { +bool lookup_file_type(const std::string &mimetype_in, FileType &file_type, + std::string_view &mimetype_out) { // https://www.openoffice.org/framework/documentation/mimetypes/mimetypes.html static const std::unordered_map MIME_TYPES = { {"application/vnd.oasis.opendocument.text", FileType::opendocument_text}, @@ -53,8 +53,14 @@ bool lookup_file_type(const std::string &mime_type, FileType &file_type) { {"application/vnd.sun.xml.draw.template", FileType::opendocument_graphics}, }; - return util::map::lookup_default(MIME_TYPES, mime_type, file_type, - FileType::unknown); + if (const auto it = MIME_TYPES.find(mimetype_in); it != MIME_TYPES.end()) { + file_type = it->second; + mimetype_out = it->first; + } else { + file_type = FileType::unknown; + mimetype_out = "application/octet-stream"; + } + return false; } } // namespace @@ -74,7 +80,7 @@ FileMeta parse_file_meta(const abstract::ReadableFilesystem &filesystem, if (filesystem.is_file(AbsPath("/mimetype"))) { const std::string mimeType = util::stream::read(*filesystem.open(AbsPath("/mimetype"))->stream()); - lookup_file_type(mimeType, result.type); + lookup_file_type(mimeType, result.type, result.mimetype); } pugi::xml_document manifest_xml; @@ -92,7 +98,7 @@ FileMeta parse_file_meta(const abstract::ReadableFilesystem &filesystem, path.root() && e.node().attribute("manifest:media-type")) { const std::string mimeType = e.node().attribute("manifest:media-type").as_string(); - lookup_file_type(mimeType, result.type); + lookup_file_type(mimeType, result.type, result.mimetype); } } if (!manifest->select_nodes("//manifest:encryption-data").empty()) { diff --git a/src/odr/internal/oldms/oldms_file.cpp b/src/odr/internal/oldms/oldms_file.cpp index 75bd15130..759d2451e 100644 --- a/src/odr/internal/oldms/oldms_file.cpp +++ b/src/odr/internal/oldms/oldms_file.cpp @@ -11,24 +11,33 @@ namespace odr::internal::oldms { namespace { FileMeta parse_meta(const abstract::ReadableFilesystem &storage) { - static const std::unordered_map types = { + struct Variant { + FileType type{FileType::unknown}; + std::string_view mimetype; + }; + + static const std::unordered_map types = { // MS-DOC: The "WordDocument" stream MUST be present in the file. // https://msdn.microsoft.com/en-us/library/dd926131(v=office.12).aspx - {AbsPath("/WordDocument"), FileType::legacy_word_document}, + {AbsPath("/WordDocument"), + {FileType::legacy_word_document, "application/msword"}}, // MS-PPT: The "PowerPoint Document" stream MUST be present in the file. // https://msdn.microsoft.com/en-us/library/dd911009(v=office.12).aspx {AbsPath("/PowerPoint Document"), - FileType::legacy_powerpoint_presentation}, + {FileType::legacy_powerpoint_presentation, + "application/vnd.ms-powerpoint"}}, // MS-XLS: The "Workbook" stream MUST be present in the file. // https://docs.microsoft.com/en-us/openspecs/office_file_formats/ms-ppt/1fc22d56-28f9-4818-bd45-67c2bf721ccf - {AbsPath("/Workbook"), FileType::legacy_excel_worksheets}, + {AbsPath("/Workbook"), + {FileType::legacy_excel_worksheets, "application/vnd.ms-excel"}}, }; FileMeta result; - for (const auto &[path, type] : types) { + for (const auto &[path, variant] : types) { if (storage.is_file(path)) { - result.type = type; + result.type = variant.type; + result.mimetype = variant.mimetype; break; } } @@ -61,6 +70,10 @@ DecoderEngine LegacyMicrosoftFile::decoder_engine() const noexcept { return DecoderEngine::odr; } +std::string_view LegacyMicrosoftFile::mimetype() const noexcept { + return m_file_meta.mimetype; +} + DocumentType LegacyMicrosoftFile::document_type() const { return m_file_meta.document_meta.value().document_type; } diff --git a/src/odr/internal/oldms/oldms_file.hpp b/src/odr/internal/oldms/oldms_file.hpp index 0206f084e..a9686ba83 100644 --- a/src/odr/internal/oldms/oldms_file.hpp +++ b/src/odr/internal/oldms/oldms_file.hpp @@ -25,6 +25,7 @@ class LegacyMicrosoftFile final : public abstract::DocumentFile { [[nodiscard]] FileType file_type() const noexcept override; [[nodiscard]] FileMeta file_meta() const noexcept override; [[nodiscard]] DecoderEngine decoder_engine() const noexcept override; + [[nodiscard]] std::string_view mimetype() const noexcept override; [[nodiscard]] DocumentType document_type() const override; [[nodiscard]] DocumentMeta document_meta() const override; diff --git a/src/odr/internal/oldms_wvware/wvware_oldms_file.cpp b/src/odr/internal/oldms_wvware/wvware_oldms_file.cpp index f5f813b86..289c981ce 100644 --- a/src/odr/internal/oldms_wvware/wvware_oldms_file.cpp +++ b/src/odr/internal/oldms_wvware/wvware_oldms_file.cpp @@ -54,6 +54,7 @@ void WvWareLegacyMicrosoftFile::open() { int ret = wvInitParser_gsf(&m_parser_state->ps, m_parser_state->gsf_input); m_file_meta.type = FileType::legacy_word_document; + m_file_meta.mimetype = "application/msword"; m_file_meta.document_meta = DocumentMeta(); m_file_meta.document_meta->document_type = DocumentType::text; @@ -94,6 +95,10 @@ DecoderEngine WvWareLegacyMicrosoftFile::decoder_engine() const noexcept { return DecoderEngine::wvware; } +std::string_view WvWareLegacyMicrosoftFile::mimetype() const noexcept { + return "application/msword"; +} + DocumentType WvWareLegacyMicrosoftFile::document_type() const { return DocumentType::text; } diff --git a/src/odr/internal/oldms_wvware/wvware_oldms_file.hpp b/src/odr/internal/oldms_wvware/wvware_oldms_file.hpp index ea183036b..2510f70d0 100644 --- a/src/odr/internal/oldms_wvware/wvware_oldms_file.hpp +++ b/src/odr/internal/oldms_wvware/wvware_oldms_file.hpp @@ -26,6 +26,7 @@ class WvWareLegacyMicrosoftFile final : public abstract::DocumentFile { [[nodiscard]] FileType file_type() const noexcept override; [[nodiscard]] FileMeta file_meta() const noexcept override; [[nodiscard]] DecoderEngine decoder_engine() const noexcept override; + [[nodiscard]] std::string_view mimetype() const noexcept override; [[nodiscard]] DocumentType document_type() const override; [[nodiscard]] DocumentMeta document_meta() const override; diff --git a/src/odr/internal/ooxml/ooxml_file.cpp b/src/odr/internal/ooxml/ooxml_file.cpp index 52cff006f..eaa0bfff4 100644 --- a/src/odr/internal/ooxml/ooxml_file.cpp +++ b/src/odr/internal/ooxml/ooxml_file.cpp @@ -39,6 +39,10 @@ DecoderEngine OfficeOpenXmlFile::decoder_engine() const noexcept { return DecoderEngine::odr; } +std::string_view OfficeOpenXmlFile::mimetype() const noexcept { + return m_file_meta.mimetype; +} + DocumentType OfficeOpenXmlFile::document_type() const { return m_file_meta.document_meta.value().document_type; } diff --git a/src/odr/internal/ooxml/ooxml_file.hpp b/src/odr/internal/ooxml/ooxml_file.hpp index 77fe81295..4355386de 100644 --- a/src/odr/internal/ooxml/ooxml_file.hpp +++ b/src/odr/internal/ooxml/ooxml_file.hpp @@ -27,6 +27,7 @@ class OfficeOpenXmlFile final : public abstract::DocumentFile { [[nodiscard]] FileType file_type() const noexcept override; [[nodiscard]] FileMeta file_meta() const noexcept override; [[nodiscard]] DecoderEngine decoder_engine() const noexcept override; + [[nodiscard]] std::string_view mimetype() const noexcept override; [[nodiscard]] DocumentType document_type() const override; [[nodiscard]] DocumentMeta document_meta() const override; diff --git a/src/odr/internal/ooxml/ooxml_meta.cpp b/src/odr/internal/ooxml/ooxml_meta.cpp index 1e739dd06..f01d8ab4c 100644 --- a/src/odr/internal/ooxml/ooxml_meta.cpp +++ b/src/odr/internal/ooxml/ooxml_meta.cpp @@ -10,19 +10,25 @@ namespace odr::internal::ooxml { -FileMeta parse_file_meta(abstract::ReadableFilesystem &filesystem) { +FileMeta parse_file_meta(const abstract::ReadableFilesystem &filesystem) { struct TypeInfo { - FileType file_type; - DocumentType document_type; + FileType file_type{FileType::unknown}; + DocumentType document_type{DocumentType::unknown}; + std::string_view mimetype; }; static const std::unordered_map types = { {AbsPath("/word/document.xml"), - {FileType::office_open_xml_document, DocumentType::text}}, + {FileType::office_open_xml_document, DocumentType::text, + "application/" + "vnd.openxmlformats-officedocument.wordprocessingml.document"}}, {AbsPath("/ppt/presentation.xml"), - {FileType::office_open_xml_presentation, DocumentType::presentation}}, + {FileType::office_open_xml_presentation, DocumentType::presentation, + "application/" + "vnd.openxmlformats-officedocument.presentationml.presentation"}}, {AbsPath("/xl/workbook.xml"), - {FileType::office_open_xml_workbook, DocumentType::spreadsheet}}, + {FileType::office_open_xml_workbook, DocumentType::spreadsheet, + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"}}, }; FileMeta result; @@ -35,10 +41,11 @@ FileMeta parse_file_meta(abstract::ReadableFilesystem &filesystem) { return result; } - for (auto &&t : types) { - if (filesystem.is_file(t.first)) { - result.type = t.second.file_type; - result.document_meta->document_type = t.second.document_type; + for (const auto &[path, info] : types) { + if (filesystem.is_file(path)) { + result.type = info.file_type; + result.mimetype = info.mimetype; + result.document_meta->document_type = info.document_type; break; } } diff --git a/src/odr/internal/ooxml/ooxml_meta.hpp b/src/odr/internal/ooxml/ooxml_meta.hpp index 8f6f4bd54..979c74f05 100644 --- a/src/odr/internal/ooxml/ooxml_meta.hpp +++ b/src/odr/internal/ooxml/ooxml_meta.hpp @@ -10,6 +10,6 @@ class ReadableFilesystem; namespace odr::internal::ooxml { -FileMeta parse_file_meta(abstract::ReadableFilesystem &filesystem); +FileMeta parse_file_meta(const abstract::ReadableFilesystem &filesystem); } // namespace odr::internal::ooxml diff --git a/src/odr/internal/pdf/pdf_file.cpp b/src/odr/internal/pdf/pdf_file.cpp index 44eb8572e..2cb111af1 100644 --- a/src/odr/internal/pdf/pdf_file.cpp +++ b/src/odr/internal/pdf/pdf_file.cpp @@ -17,6 +17,10 @@ DecoderEngine PdfFile::decoder_engine() const noexcept { return DecoderEngine::odr; } +std::string_view PdfFile::mimetype() const noexcept { + return "application/pdf"; +} + bool PdfFile::password_encrypted() const noexcept { return false; } EncryptionState PdfFile::encryption_state() const noexcept { diff --git a/src/odr/internal/pdf/pdf_file.hpp b/src/odr/internal/pdf/pdf_file.hpp index 02d4d77dc..1f596d5f3 100644 --- a/src/odr/internal/pdf/pdf_file.hpp +++ b/src/odr/internal/pdf/pdf_file.hpp @@ -12,6 +12,7 @@ class PdfFile final : public abstract::PdfFile { [[nodiscard]] FileMeta file_meta() const noexcept override; [[nodiscard]] DecoderEngine decoder_engine() const noexcept override; + [[nodiscard]] std::string_view mimetype() const noexcept override; [[nodiscard]] bool password_encrypted() const noexcept override; [[nodiscard]] EncryptionState encryption_state() const noexcept override; diff --git a/src/odr/internal/pdf_poppler/poppler_pdf_file.cpp b/src/odr/internal/pdf_poppler/poppler_pdf_file.cpp index 51c8c957a..4129ea5be 100644 --- a/src/odr/internal/pdf_poppler/poppler_pdf_file.cpp +++ b/src/odr/internal/pdf_poppler/poppler_pdf_file.cpp @@ -53,6 +53,7 @@ void PopplerPdfFile::open(const std::optional &password) { } m_file_meta.type = FileType::portable_document_format; + m_file_meta.mimetype = "application/pdf"; m_file_meta.password_encrypted = m_pdf_doc->isEncrypted(); m_file_meta.document_meta.emplace(); m_file_meta.document_meta->document_type = DocumentType::text; @@ -71,6 +72,10 @@ DecoderEngine PopplerPdfFile::decoder_engine() const noexcept { return DecoderEngine::poppler; } +std::string_view PopplerPdfFile::mimetype() const noexcept { + return "application/pdf"; +} + bool PopplerPdfFile::password_encrypted() const noexcept { return m_encryption_state == EncryptionState::encrypted || m_encryption_state == EncryptionState::decrypted; diff --git a/src/odr/internal/pdf_poppler/poppler_pdf_file.hpp b/src/odr/internal/pdf_poppler/poppler_pdf_file.hpp index c30163578..8e0a9c193 100644 --- a/src/odr/internal/pdf_poppler/poppler_pdf_file.hpp +++ b/src/odr/internal/pdf_poppler/poppler_pdf_file.hpp @@ -17,6 +17,7 @@ class PopplerPdfFile final : public abstract::PdfFile { [[nodiscard]] FileMeta file_meta() const noexcept override; [[nodiscard]] DecoderEngine decoder_engine() const noexcept override; + [[nodiscard]] std::string_view mimetype() const noexcept override; [[nodiscard]] bool password_encrypted() const noexcept override; [[nodiscard]] EncryptionState encryption_state() const noexcept override; diff --git a/src/odr/internal/svm/svm_file.cpp b/src/odr/internal/svm/svm_file.cpp index c82b4e727..770e03edd 100644 --- a/src/odr/internal/svm/svm_file.cpp +++ b/src/odr/internal/svm/svm_file.cpp @@ -28,15 +28,18 @@ FileType SvmFile::file_type() const noexcept { } FileMeta SvmFile::file_meta() const noexcept { - FileMeta result; - result.type = FileType::starview_metafile; - return result; + return {FileType::starview_metafile, "application/x-starview-metafile", false, + std::nullopt}; } DecoderEngine SvmFile::decoder_engine() const noexcept { return DecoderEngine::odr; } +std::string_view SvmFile::mimetype() const noexcept { + return "application/x-starview-metafile"; +} + bool SvmFile::is_decodable() const noexcept { return false; } std::shared_ptr SvmFile::image() const { diff --git a/src/odr/internal/svm/svm_file.hpp b/src/odr/internal/svm/svm_file.hpp index 6d033c00c..be2a74e71 100644 --- a/src/odr/internal/svm/svm_file.hpp +++ b/src/odr/internal/svm/svm_file.hpp @@ -20,6 +20,7 @@ class SvmFile final : public abstract::ImageFile { [[nodiscard]] FileType file_type() const noexcept override; [[nodiscard]] FileMeta file_meta() const noexcept override; [[nodiscard]] DecoderEngine decoder_engine() const noexcept override; + [[nodiscard]] std::string_view mimetype() const noexcept override; [[nodiscard]] bool is_decodable() const noexcept override; diff --git a/src/odr/internal/text/text_file.cpp b/src/odr/internal/text/text_file.cpp index 433379830..fe529a269 100644 --- a/src/odr/internal/text/text_file.cpp +++ b/src/odr/internal/text/text_file.cpp @@ -18,14 +18,16 @@ std::shared_ptr TextFile::file() const noexcept { FileType TextFile::file_type() const noexcept { return FileType::text_file; } -FileMeta TextFile::file_meta() const noexcept { - return {FileType::text_file, false, {}}; -} - DecoderEngine TextFile::decoder_engine() const noexcept { return DecoderEngine::odr; } +std::string_view TextFile::mimetype() const noexcept { return "text/plain"; } + +FileMeta TextFile::file_meta() const noexcept { + return {FileType::text_file, "text/plain", false, std::nullopt}; +} + bool TextFile::is_decodable() const noexcept { return false; } } // namespace odr::internal::text diff --git a/src/odr/internal/text/text_file.hpp b/src/odr/internal/text/text_file.hpp index 7361dec50..ddf38dde6 100644 --- a/src/odr/internal/text/text_file.hpp +++ b/src/odr/internal/text/text_file.hpp @@ -18,6 +18,7 @@ class TextFile final : public abstract::TextFile { [[nodiscard]] FileType file_type() const noexcept override; [[nodiscard]] FileMeta file_meta() const noexcept override; [[nodiscard]] DecoderEngine decoder_engine() const noexcept override; + [[nodiscard]] std::string_view mimetype() const noexcept override; [[nodiscard]] bool is_decodable() const noexcept override; diff --git a/src/odr/internal/zip/zip_file.cpp b/src/odr/internal/zip/zip_file.cpp index dc0f82d1f..5ae18671b 100644 --- a/src/odr/internal/zip/zip_file.cpp +++ b/src/odr/internal/zip/zip_file.cpp @@ -18,15 +18,17 @@ std::shared_ptr ZipFile::file() const noexcept { FileType ZipFile::file_type() const noexcept { return FileType::zip; } FileMeta ZipFile::file_meta() const noexcept { - FileMeta meta; - meta.type = file_type(); - return meta; + return {FileType::zip, "application/zip", false, std::nullopt}; } DecoderEngine ZipFile::decoder_engine() const noexcept { return DecoderEngine::odr; } +std::string_view ZipFile::mimetype() const noexcept { + return "application/zip"; +} + bool ZipFile::is_decodable() const noexcept { return true; } std::shared_ptr ZipFile::archive() const { diff --git a/src/odr/internal/zip/zip_file.hpp b/src/odr/internal/zip/zip_file.hpp index 5755d3cdc..ec87eaa06 100644 --- a/src/odr/internal/zip/zip_file.hpp +++ b/src/odr/internal/zip/zip_file.hpp @@ -27,6 +27,7 @@ class ZipFile final : public abstract::ArchiveFile { [[nodiscard]] FileType file_type() const noexcept override; [[nodiscard]] FileMeta file_meta() const noexcept override; [[nodiscard]] DecoderEngine decoder_engine() const noexcept override; + [[nodiscard]] std::string_view mimetype() const noexcept override; [[nodiscard]] bool is_decodable() const noexcept override; diff --git a/src/odr/odr.cpp b/src/odr/odr.cpp index d7bca15d4..b3c679345 100644 --- a/src/odr/odr.cpp +++ b/src/odr/odr.cpp @@ -252,6 +252,143 @@ std::string odr::document_type_to_string(const DocumentType type) noexcept { } } +odr::FileType +odr::file_type_by_mimetype(const std::string_view mimetype) noexcept { + if (mimetype == "application/vnd.oasis.opendocument.text") { + return FileType::opendocument_text; + } + if (mimetype == "application/vnd.oasis.opendocument.presentation") { + return FileType::opendocument_presentation; + } + if (mimetype == "application/vnd.oasis.opendocument.spreadsheet") { + return FileType::opendocument_spreadsheet; + } + if (mimetype == "application/vnd.oasis.opendocument.graphics") { + return FileType::opendocument_graphics; + } + if (mimetype == + "application/" + "vnd.openxmlformats-officedocument.wordprocessingml.document") { + return FileType::office_open_xml_document; + } + if (mimetype == + "application/" + "vnd.openxmlformats-officedocument.presentationml.presentation") { + return FileType::office_open_xml_presentation; + } + if (mimetype == + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet") { + return FileType::office_open_xml_workbook; + } + if (mimetype == "application/msword") { + return FileType::legacy_word_document; + } + if (mimetype == "application/vnd.ms-powerpoint") { + return FileType::legacy_powerpoint_presentation; + } + if (mimetype == "application/vnd.ms-excel") { + return FileType::legacy_excel_worksheets; + } + if (mimetype == "application/zip" || + mimetype == "application/x-zip-compressed") { + return FileType::zip; + } + if (mimetype == "application/pdf") { + return FileType::portable_document_format; + } + if (mimetype == "text/plain") { + return FileType::text_file; + } + if (mimetype == "text/csv") { + return FileType::comma_separated_values; + } + if (mimetype == "application/json") { + return FileType::javascript_object_notation; + } + if (mimetype == "text/markdown") { + return FileType::markdown; + } + if (mimetype == "image/png") { + return FileType::portable_network_graphics; + } + if (mimetype == "image/gif") { + return FileType::graphics_interchange_format; + } + if (mimetype == "image/jpeg") { + return FileType::jpeg; + } + if (mimetype == "image/bmp") { + return FileType::bitmap_image_file; + } + return FileType::unknown; +} + +std::string_view odr::mimetype_by_file_type(const FileType type) { + if (type == FileType::opendocument_text) { + return "application/vnd.oasis.opendocument.text"; + } + if (type == FileType::opendocument_presentation) { + return "application/vnd.oasis.opendocument.presentation"; + } + if (type == FileType::opendocument_spreadsheet) { + return "application/vnd.oasis.opendocument.spreadsheet"; + } + if (type == FileType::opendocument_graphics) { + return "application/vnd.oasis.opendocument.graphics"; + } + if (type == FileType::office_open_xml_document) { + return "application/" + "vnd.openxmlformats-officedocument.wordprocessingml.document"; + } + if (type == FileType::office_open_xml_presentation) { + return "application/" + "vnd.openxmlformats-officedocument.presentationml.presentation"; + } + if (type == FileType::office_open_xml_workbook) { + return "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"; + } + if (type == FileType::legacy_word_document) { + return "application/msword"; + } + if (type == FileType::legacy_powerpoint_presentation) { + return "application/vnd.ms-powerpoint"; + } + if (type == FileType::legacy_excel_worksheets) { + return "application/vnd.ms-excel"; + } + if (type == FileType::zip) { + return "application/zip"; + } + if (type == FileType::portable_document_format) { + return "application/pdf"; + } + if (type == FileType::text_file) { + return "text/plain"; + } + if (type == FileType::comma_separated_values) { + return "text/csv"; + } + if (type == FileType::javascript_object_notation) { + return "application/json"; + } + if (type == FileType::markdown) { + return "text/markdown"; + } + if (type == FileType::portable_network_graphics) { + return "image/png"; + } + if (type == FileType::graphics_interchange_format) { + return "image/gif"; + } + if (type == FileType::jpeg) { + return "image/jpeg"; + } + if (type == FileType::bitmap_image_file) { + return "image/bmp"; + } + throw UnsupportedFileType(type); +} + std::string odr::decoder_engine_to_string(const DecoderEngine engine) { if (engine == DecoderEngine::odr) { return "odr"; diff --git a/src/odr/odr.hpp b/src/odr/odr.hpp index c5a8b5260..bfd8c094a 100644 --- a/src/odr/odr.hpp +++ b/src/odr/odr.hpp @@ -54,6 +54,15 @@ file_type_by_file_extension(const std::string &extension) noexcept; /// @param type The file type. /// @return The file type as a string. [[nodiscard]] std::string document_type_to_string(DocumentType type) noexcept; +/// @brief Get the file type by the MIME type. +/// @param mimetype The MIME type. +/// @return The file type. +[[nodiscard]] FileType +file_type_by_mimetype(std::string_view mimetype) noexcept; +/// @brief Get MIME type by the file type. +/// @param type The file type. +/// @return The MIME type. +[[nodiscard]] std::string_view mimetype_by_file_type(FileType type); /// @brief Get the decoder engine as a string. /// @param engine The decoder engine. @@ -74,6 +83,12 @@ list_file_types(const std::string &path, Logger &logger = Logger::null()); /// @param as The file type. /// @return The decoder engines. [[nodiscard]] std::vector list_decoder_engines(FileType as); +/// @brief Determine MIME types by the file path. +/// @param path The file path. +/// @param logger The logger to use. +/// @return The MIME types. +[[nodiscard]] std::string_view mimetype(const std::string &path, + Logger &logger = Logger::null()); /// @brief Open a file. /// @param path The file path. From 0b17685e5e1cdbb4020600a05670c0eb8c87dace Mon Sep 17 00:00:00 2001 From: Andreas Stefl Date: Sun, 19 Oct 2025 15:11:08 +0200 Subject: [PATCH 3/8] fix; cleanup --- src/odr/internal/abstract/file.hpp | 7 +++++-- src/odr/internal/cfb/cfb_file.cpp | 17 ++++++++--------- src/odr/internal/cfb/cfb_file.hpp | 4 ++-- src/odr/internal/csv/csv_file.cpp | 10 +++++----- src/odr/internal/csv/csv_file.hpp | 4 ++-- src/odr/internal/json/json_file.cpp | 17 ++++++++--------- src/odr/internal/json/json_file.hpp | 4 ++-- src/odr/internal/libmagic/libmagic.cpp | 2 ++ src/odr/internal/odf/odf_file.cpp | 12 ++++++------ src/odr/internal/odf/odf_file.hpp | 4 ++-- src/odr/internal/oldms/oldms_file.cpp | 12 ++++++------ src/odr/internal/oldms/oldms_file.hpp | 4 ++-- .../internal/oldms_wvware/wvware_oldms_file.cpp | 16 ++++++++-------- .../internal/oldms_wvware/wvware_oldms_file.hpp | 4 ++-- src/odr/internal/ooxml/ooxml_file.cpp | 12 ++++++------ src/odr/internal/ooxml/ooxml_file.hpp | 4 ++-- src/odr/internal/pdf/pdf_file.cpp | 6 +----- src/odr/internal/pdf/pdf_file.hpp | 3 +-- .../internal/pdf_poppler/poppler_pdf_file.cpp | 6 +----- .../internal/pdf_poppler/poppler_pdf_file.hpp | 3 +-- src/odr/internal/svm/svm_file.cpp | 17 ++++++++--------- src/odr/internal/svm/svm_file.hpp | 4 ++-- src/odr/internal/text/text_file.cpp | 6 +++--- src/odr/internal/text/text_file.hpp | 4 ++-- src/odr/internal/zip/zip_file.cpp | 12 ++++++------ src/odr/internal/zip/zip_file.hpp | 4 ++-- 26 files changed, 95 insertions(+), 103 deletions(-) diff --git a/src/odr/internal/abstract/file.hpp b/src/odr/internal/abstract/file.hpp index b41eb209f..1720cd7e4 100644 --- a/src/odr/internal/abstract/file.hpp +++ b/src/odr/internal/abstract/file.hpp @@ -34,11 +34,11 @@ class DecodedFile { [[nodiscard]] virtual std::shared_ptr file() const noexcept = 0; + [[nodiscard]] virtual DecoderEngine decoder_engine() const noexcept = 0; [[nodiscard]] virtual FileType file_type() const noexcept = 0; [[nodiscard]] virtual FileCategory file_category() const noexcept = 0; - [[nodiscard]] virtual FileMeta file_meta() const noexcept = 0; - [[nodiscard]] virtual DecoderEngine decoder_engine() const noexcept = 0; [[nodiscard]] virtual std::string_view mimetype() const noexcept = 0; + [[nodiscard]] virtual FileMeta file_meta() const noexcept = 0; [[nodiscard]] virtual bool password_encrypted() const noexcept { return false; @@ -100,6 +100,9 @@ class PdfFile : public DecodedFile { [[nodiscard]] FileCategory file_category() const noexcept final { return FileCategory::document; } + [[nodiscard]] std::string_view mimetype() const noexcept final { + return "application/pdf"; + } }; } // namespace odr::internal::abstract diff --git a/src/odr/internal/cfb/cfb_file.cpp b/src/odr/internal/cfb/cfb_file.cpp index 1bd6af703..49759f431 100644 --- a/src/odr/internal/cfb/cfb_file.cpp +++ b/src/odr/internal/cfb/cfb_file.cpp @@ -12,23 +12,22 @@ std::shared_ptr CfbFile::file() const noexcept { return m_cfb->file(); } -FileType CfbFile::file_type() const noexcept { - return FileType::compound_file_binary_format; -} - -FileMeta CfbFile::file_meta() const noexcept { - return {FileType::compound_file_binary_format, "application/x-cfb", false, - std::nullopt}; -} - DecoderEngine CfbFile::decoder_engine() const noexcept { return DecoderEngine::odr; } +FileType CfbFile::file_type() const noexcept { + return FileType::compound_file_binary_format; +} + std::string_view CfbFile::mimetype() const noexcept { return "application/x-cfb"; } +FileMeta CfbFile::file_meta() const noexcept { + return {file_type(), mimetype(), false, std::nullopt}; +} + bool CfbFile::is_decodable() const noexcept { return true; } std::shared_ptr CfbFile::archive() const { diff --git a/src/odr/internal/cfb/cfb_file.hpp b/src/odr/internal/cfb/cfb_file.hpp index a7dd129a0..ac2520b25 100644 --- a/src/odr/internal/cfb/cfb_file.hpp +++ b/src/odr/internal/cfb/cfb_file.hpp @@ -23,10 +23,10 @@ class CfbFile final : public abstract::ArchiveFile { [[nodiscard]] std::shared_ptr file() const noexcept override; - [[nodiscard]] FileType file_type() const noexcept override; - [[nodiscard]] FileMeta file_meta() const noexcept override; [[nodiscard]] DecoderEngine decoder_engine() const noexcept override; + [[nodiscard]] FileType file_type() const noexcept override; [[nodiscard]] std::string_view mimetype() const noexcept override; + [[nodiscard]] FileMeta file_meta() const noexcept override; [[nodiscard]] bool is_decodable() const noexcept override; diff --git a/src/odr/internal/csv/csv_file.cpp b/src/odr/internal/csv/csv_file.cpp index 19b63d062..4d6f9b909 100644 --- a/src/odr/internal/csv/csv_file.cpp +++ b/src/odr/internal/csv/csv_file.cpp @@ -16,18 +16,18 @@ std::shared_ptr CsvFile::file() const noexcept { return m_file->file(); } -FileType CsvFile::file_type() const noexcept { - return FileType::comma_separated_values; -} - DecoderEngine CsvFile::decoder_engine() const noexcept { return DecoderEngine::odr; } +FileType CsvFile::file_type() const noexcept { + return FileType::comma_separated_values; +} + std::string_view CsvFile::mimetype() const noexcept { return "text/csv"; } FileMeta CsvFile::file_meta() const noexcept { - return {FileType::comma_separated_values, "text/csv", false, std::nullopt}; + return {file_type(), mimetype(), false, std::nullopt}; } bool CsvFile::is_decodable() const noexcept { return false; } diff --git a/src/odr/internal/csv/csv_file.hpp b/src/odr/internal/csv/csv_file.hpp index 315471028..100c664a6 100644 --- a/src/odr/internal/csv/csv_file.hpp +++ b/src/odr/internal/csv/csv_file.hpp @@ -14,10 +14,10 @@ class CsvFile final : public abstract::TextFile { [[nodiscard]] std::shared_ptr file() const noexcept override; - [[nodiscard]] FileType file_type() const noexcept override; - [[nodiscard]] FileMeta file_meta() const noexcept override; [[nodiscard]] DecoderEngine decoder_engine() const noexcept override; + [[nodiscard]] FileType file_type() const noexcept override; [[nodiscard]] std::string_view mimetype() const noexcept override; + [[nodiscard]] FileMeta file_meta() const noexcept override; [[nodiscard]] bool is_decodable() const noexcept override; diff --git a/src/odr/internal/json/json_file.cpp b/src/odr/internal/json/json_file.cpp index 78df9d119..d5fecd724 100644 --- a/src/odr/internal/json/json_file.cpp +++ b/src/odr/internal/json/json_file.cpp @@ -16,23 +16,22 @@ std::shared_ptr JsonFile::file() const noexcept { return m_file->file(); } -FileType JsonFile::file_type() const noexcept { - return FileType::javascript_object_notation; -} - -FileMeta JsonFile::file_meta() const noexcept { - return {FileType::javascript_object_notation, "application/json", false, - std::nullopt}; -} - DecoderEngine JsonFile::decoder_engine() const noexcept { return DecoderEngine::odr; } +FileType JsonFile::file_type() const noexcept { + return FileType::javascript_object_notation; +} + std::string_view JsonFile::mimetype() const noexcept { return "application/json"; } +FileMeta JsonFile::file_meta() const noexcept { + return {file_type(), mimetype(), false, std::nullopt}; +} + bool JsonFile::is_decodable() const noexcept { return false; } } // namespace odr::internal::json diff --git a/src/odr/internal/json/json_file.hpp b/src/odr/internal/json/json_file.hpp index db36bcbd8..22fb5c9e2 100644 --- a/src/odr/internal/json/json_file.hpp +++ b/src/odr/internal/json/json_file.hpp @@ -14,10 +14,10 @@ class JsonFile final : public abstract::TextFile { [[nodiscard]] std::shared_ptr file() const noexcept override; - [[nodiscard]] FileType file_type() const noexcept override; - [[nodiscard]] FileMeta file_meta() const noexcept override; [[nodiscard]] DecoderEngine decoder_engine() const noexcept override; + [[nodiscard]] FileType file_type() const noexcept override; [[nodiscard]] std::string_view mimetype() const noexcept override; + [[nodiscard]] FileMeta file_meta() const noexcept override; [[nodiscard]] bool is_decodable() const noexcept override; diff --git a/src/odr/internal/libmagic/libmagic.cpp b/src/odr/internal/libmagic/libmagic.cpp index ff9bb8b7f..4f826f3ff 100644 --- a/src/odr/internal/libmagic/libmagic.cpp +++ b/src/odr/internal/libmagic/libmagic.cpp @@ -2,6 +2,8 @@ #include +#include + #include namespace odr::internal { diff --git a/src/odr/internal/odf/odf_file.cpp b/src/odr/internal/odf/odf_file.cpp index 5d41a3992..1701a7007 100644 --- a/src/odr/internal/odf/odf_file.cpp +++ b/src/odr/internal/odf/odf_file.cpp @@ -36,20 +36,20 @@ std::shared_ptr OpenDocumentFile::file() const noexcept { return {}; } -FileType OpenDocumentFile::file_type() const noexcept { - return m_file_meta.type; -} - -FileMeta OpenDocumentFile::file_meta() const noexcept { return m_file_meta; } - DecoderEngine OpenDocumentFile::decoder_engine() const noexcept { return DecoderEngine::odr; } +FileType OpenDocumentFile::file_type() const noexcept { + return m_file_meta.type; +} + std::string_view OpenDocumentFile::mimetype() const noexcept { return m_file_meta.mimetype; } +FileMeta OpenDocumentFile::file_meta() const noexcept { return m_file_meta; } + DocumentType OpenDocumentFile::document_type() const { return m_file_meta.document_meta.value().document_type; } diff --git a/src/odr/internal/odf/odf_file.hpp b/src/odr/internal/odf/odf_file.hpp index 50a893483..77a6e94c2 100644 --- a/src/odr/internal/odf/odf_file.hpp +++ b/src/odr/internal/odf/odf_file.hpp @@ -25,10 +25,10 @@ class OpenDocumentFile final : public virtual abstract::DocumentFile { [[nodiscard]] std::shared_ptr file() const noexcept override; - [[nodiscard]] FileType file_type() const noexcept override; - [[nodiscard]] FileMeta file_meta() const noexcept override; [[nodiscard]] DecoderEngine decoder_engine() const noexcept override; + [[nodiscard]] FileType file_type() const noexcept override; [[nodiscard]] std::string_view mimetype() const noexcept override; + [[nodiscard]] FileMeta file_meta() const noexcept override; [[nodiscard]] DocumentType document_type() const override; [[nodiscard]] DocumentMeta document_meta() const override; diff --git a/src/odr/internal/oldms/oldms_file.cpp b/src/odr/internal/oldms/oldms_file.cpp index 759d2451e..ce11b0cfa 100644 --- a/src/odr/internal/oldms/oldms_file.cpp +++ b/src/odr/internal/oldms/oldms_file.cpp @@ -60,20 +60,20 @@ std::shared_ptr LegacyMicrosoftFile::file() const noexcept { return {}; } -FileType LegacyMicrosoftFile::file_type() const noexcept { - return m_file_meta.type; -} - -FileMeta LegacyMicrosoftFile::file_meta() const noexcept { return m_file_meta; } - DecoderEngine LegacyMicrosoftFile::decoder_engine() const noexcept { return DecoderEngine::odr; } +FileType LegacyMicrosoftFile::file_type() const noexcept { + return m_file_meta.type; +} + std::string_view LegacyMicrosoftFile::mimetype() const noexcept { return m_file_meta.mimetype; } +FileMeta LegacyMicrosoftFile::file_meta() const noexcept { return m_file_meta; } + DocumentType LegacyMicrosoftFile::document_type() const { return m_file_meta.document_meta.value().document_type; } diff --git a/src/odr/internal/oldms/oldms_file.hpp b/src/odr/internal/oldms/oldms_file.hpp index a9686ba83..4aa9aacf1 100644 --- a/src/odr/internal/oldms/oldms_file.hpp +++ b/src/odr/internal/oldms/oldms_file.hpp @@ -22,10 +22,10 @@ class LegacyMicrosoftFile final : public abstract::DocumentFile { [[nodiscard]] std::shared_ptr file() const noexcept override; - [[nodiscard]] FileType file_type() const noexcept override; - [[nodiscard]] FileMeta file_meta() const noexcept override; [[nodiscard]] DecoderEngine decoder_engine() const noexcept override; + [[nodiscard]] FileType file_type() const noexcept override; [[nodiscard]] std::string_view mimetype() const noexcept override; + [[nodiscard]] FileMeta file_meta() const noexcept override; [[nodiscard]] DocumentType document_type() const override; [[nodiscard]] DocumentMeta document_meta() const override; diff --git a/src/odr/internal/oldms_wvware/wvware_oldms_file.cpp b/src/odr/internal/oldms_wvware/wvware_oldms_file.cpp index 289c981ce..34e1f6818 100644 --- a/src/odr/internal/oldms_wvware/wvware_oldms_file.cpp +++ b/src/odr/internal/oldms_wvware/wvware_oldms_file.cpp @@ -83,22 +83,22 @@ WvWareLegacyMicrosoftFile::file() const noexcept { return m_file; } -FileType WvWareLegacyMicrosoftFile::file_type() const noexcept { - return FileType::legacy_word_document; -} - -FileMeta WvWareLegacyMicrosoftFile::file_meta() const noexcept { - return m_file_meta; -} - DecoderEngine WvWareLegacyMicrosoftFile::decoder_engine() const noexcept { return DecoderEngine::wvware; } +FileType WvWareLegacyMicrosoftFile::file_type() const noexcept { + return FileType::legacy_word_document; +} + std::string_view WvWareLegacyMicrosoftFile::mimetype() const noexcept { return "application/msword"; } +FileMeta WvWareLegacyMicrosoftFile::file_meta() const noexcept { + return m_file_meta; +} + DocumentType WvWareLegacyMicrosoftFile::document_type() const { return DocumentType::text; } diff --git a/src/odr/internal/oldms_wvware/wvware_oldms_file.hpp b/src/odr/internal/oldms_wvware/wvware_oldms_file.hpp index 2510f70d0..b7b4d6150 100644 --- a/src/odr/internal/oldms_wvware/wvware_oldms_file.hpp +++ b/src/odr/internal/oldms_wvware/wvware_oldms_file.hpp @@ -23,10 +23,10 @@ class WvWareLegacyMicrosoftFile final : public abstract::DocumentFile { [[nodiscard]] std::shared_ptr file() const noexcept override; - [[nodiscard]] FileType file_type() const noexcept override; - [[nodiscard]] FileMeta file_meta() const noexcept override; [[nodiscard]] DecoderEngine decoder_engine() const noexcept override; + [[nodiscard]] FileType file_type() const noexcept override; [[nodiscard]] std::string_view mimetype() const noexcept override; + [[nodiscard]] FileMeta file_meta() const noexcept override; [[nodiscard]] DocumentType document_type() const override; [[nodiscard]] DocumentMeta document_meta() const override; diff --git a/src/odr/internal/ooxml/ooxml_file.cpp b/src/odr/internal/ooxml/ooxml_file.cpp index eaa0bfff4..4db646513 100644 --- a/src/odr/internal/ooxml/ooxml_file.cpp +++ b/src/odr/internal/ooxml/ooxml_file.cpp @@ -29,20 +29,20 @@ std::shared_ptr OfficeOpenXmlFile::file() const noexcept { return {}; } -FileType OfficeOpenXmlFile::file_type() const noexcept { - return m_file_meta.type; -} - -FileMeta OfficeOpenXmlFile::file_meta() const noexcept { return m_file_meta; } - DecoderEngine OfficeOpenXmlFile::decoder_engine() const noexcept { return DecoderEngine::odr; } +FileType OfficeOpenXmlFile::file_type() const noexcept { + return m_file_meta.type; +} + std::string_view OfficeOpenXmlFile::mimetype() const noexcept { return m_file_meta.mimetype; } +FileMeta OfficeOpenXmlFile::file_meta() const noexcept { return m_file_meta; } + DocumentType OfficeOpenXmlFile::document_type() const { return m_file_meta.document_meta.value().document_type; } diff --git a/src/odr/internal/ooxml/ooxml_file.hpp b/src/odr/internal/ooxml/ooxml_file.hpp index 4355386de..2b88b294a 100644 --- a/src/odr/internal/ooxml/ooxml_file.hpp +++ b/src/odr/internal/ooxml/ooxml_file.hpp @@ -24,10 +24,10 @@ class OfficeOpenXmlFile final : public abstract::DocumentFile { [[nodiscard]] std::shared_ptr file() const noexcept override; - [[nodiscard]] FileType file_type() const noexcept override; - [[nodiscard]] FileMeta file_meta() const noexcept override; [[nodiscard]] DecoderEngine decoder_engine() const noexcept override; + [[nodiscard]] FileType file_type() const noexcept override; [[nodiscard]] std::string_view mimetype() const noexcept override; + [[nodiscard]] FileMeta file_meta() const noexcept override; [[nodiscard]] DocumentType document_type() const override; [[nodiscard]] DocumentMeta document_meta() const override; diff --git a/src/odr/internal/pdf/pdf_file.cpp b/src/odr/internal/pdf/pdf_file.cpp index 2cb111af1..b5a0ba9ad 100644 --- a/src/odr/internal/pdf/pdf_file.cpp +++ b/src/odr/internal/pdf/pdf_file.cpp @@ -11,15 +11,11 @@ std::shared_ptr PdfFile::file() const noexcept { return m_file; } -FileMeta PdfFile::file_meta() const noexcept { return m_file_meta; } - DecoderEngine PdfFile::decoder_engine() const noexcept { return DecoderEngine::odr; } -std::string_view PdfFile::mimetype() const noexcept { - return "application/pdf"; -} +FileMeta PdfFile::file_meta() const noexcept { return m_file_meta; } bool PdfFile::password_encrypted() const noexcept { return false; } diff --git a/src/odr/internal/pdf/pdf_file.hpp b/src/odr/internal/pdf/pdf_file.hpp index 1f596d5f3..de858569a 100644 --- a/src/odr/internal/pdf/pdf_file.hpp +++ b/src/odr/internal/pdf/pdf_file.hpp @@ -10,9 +10,8 @@ class PdfFile final : public abstract::PdfFile { [[nodiscard]] std::shared_ptr file() const noexcept override; - [[nodiscard]] FileMeta file_meta() const noexcept override; [[nodiscard]] DecoderEngine decoder_engine() const noexcept override; - [[nodiscard]] std::string_view mimetype() const noexcept override; + [[nodiscard]] FileMeta file_meta() const noexcept override; [[nodiscard]] bool password_encrypted() const noexcept override; [[nodiscard]] EncryptionState encryption_state() const noexcept override; diff --git a/src/odr/internal/pdf_poppler/poppler_pdf_file.cpp b/src/odr/internal/pdf_poppler/poppler_pdf_file.cpp index 4129ea5be..54b40208f 100644 --- a/src/odr/internal/pdf_poppler/poppler_pdf_file.cpp +++ b/src/odr/internal/pdf_poppler/poppler_pdf_file.cpp @@ -66,15 +66,11 @@ std::shared_ptr PopplerPdfFile::file() const noexcept { return m_file; } -FileMeta PopplerPdfFile::file_meta() const noexcept { return m_file_meta; } - DecoderEngine PopplerPdfFile::decoder_engine() const noexcept { return DecoderEngine::poppler; } -std::string_view PopplerPdfFile::mimetype() const noexcept { - return "application/pdf"; -} +FileMeta PopplerPdfFile::file_meta() const noexcept { return m_file_meta; } bool PopplerPdfFile::password_encrypted() const noexcept { return m_encryption_state == EncryptionState::encrypted || diff --git a/src/odr/internal/pdf_poppler/poppler_pdf_file.hpp b/src/odr/internal/pdf_poppler/poppler_pdf_file.hpp index 8e0a9c193..40367fcf4 100644 --- a/src/odr/internal/pdf_poppler/poppler_pdf_file.hpp +++ b/src/odr/internal/pdf_poppler/poppler_pdf_file.hpp @@ -15,9 +15,8 @@ class PopplerPdfFile final : public abstract::PdfFile { [[nodiscard]] std::shared_ptr file() const noexcept override; - [[nodiscard]] FileMeta file_meta() const noexcept override; [[nodiscard]] DecoderEngine decoder_engine() const noexcept override; - [[nodiscard]] std::string_view mimetype() const noexcept override; + [[nodiscard]] FileMeta file_meta() const noexcept override; [[nodiscard]] bool password_encrypted() const noexcept override; [[nodiscard]] EncryptionState encryption_state() const noexcept override; diff --git a/src/odr/internal/svm/svm_file.cpp b/src/odr/internal/svm/svm_file.cpp index 770e03edd..fba4ab14b 100644 --- a/src/odr/internal/svm/svm_file.cpp +++ b/src/odr/internal/svm/svm_file.cpp @@ -23,23 +23,22 @@ std::shared_ptr SvmFile::file() const noexcept { return m_file; } -FileType SvmFile::file_type() const noexcept { - return FileType::starview_metafile; -} - -FileMeta SvmFile::file_meta() const noexcept { - return {FileType::starview_metafile, "application/x-starview-metafile", false, - std::nullopt}; -} - DecoderEngine SvmFile::decoder_engine() const noexcept { return DecoderEngine::odr; } +FileType SvmFile::file_type() const noexcept { + return FileType::starview_metafile; +} + std::string_view SvmFile::mimetype() const noexcept { return "application/x-starview-metafile"; } +FileMeta SvmFile::file_meta() const noexcept { + return {file_type(), mimetype(), false, std::nullopt}; +} + bool SvmFile::is_decodable() const noexcept { return false; } std::shared_ptr SvmFile::image() const { diff --git a/src/odr/internal/svm/svm_file.hpp b/src/odr/internal/svm/svm_file.hpp index be2a74e71..b6e1c7c10 100644 --- a/src/odr/internal/svm/svm_file.hpp +++ b/src/odr/internal/svm/svm_file.hpp @@ -17,10 +17,10 @@ class SvmFile final : public abstract::ImageFile { [[nodiscard]] std::shared_ptr file() const noexcept override; - [[nodiscard]] FileType file_type() const noexcept override; - [[nodiscard]] FileMeta file_meta() const noexcept override; [[nodiscard]] DecoderEngine decoder_engine() const noexcept override; + [[nodiscard]] FileType file_type() const noexcept override; [[nodiscard]] std::string_view mimetype() const noexcept override; + [[nodiscard]] FileMeta file_meta() const noexcept override; [[nodiscard]] bool is_decodable() const noexcept override; diff --git a/src/odr/internal/text/text_file.cpp b/src/odr/internal/text/text_file.cpp index fe529a269..c8f32c02b 100644 --- a/src/odr/internal/text/text_file.cpp +++ b/src/odr/internal/text/text_file.cpp @@ -16,16 +16,16 @@ std::shared_ptr TextFile::file() const noexcept { return m_file; } -FileType TextFile::file_type() const noexcept { return FileType::text_file; } - DecoderEngine TextFile::decoder_engine() const noexcept { return DecoderEngine::odr; } +FileType TextFile::file_type() const noexcept { return FileType::text_file; } + std::string_view TextFile::mimetype() const noexcept { return "text/plain"; } FileMeta TextFile::file_meta() const noexcept { - return {FileType::text_file, "text/plain", false, std::nullopt}; + return {file_type(), mimetype(), false, std::nullopt}; } bool TextFile::is_decodable() const noexcept { return false; } diff --git a/src/odr/internal/text/text_file.hpp b/src/odr/internal/text/text_file.hpp index ddf38dde6..bcf21324c 100644 --- a/src/odr/internal/text/text_file.hpp +++ b/src/odr/internal/text/text_file.hpp @@ -15,10 +15,10 @@ class TextFile final : public abstract::TextFile { [[nodiscard]] std::shared_ptr file() const noexcept override; - [[nodiscard]] FileType file_type() const noexcept override; - [[nodiscard]] FileMeta file_meta() const noexcept override; [[nodiscard]] DecoderEngine decoder_engine() const noexcept override; + [[nodiscard]] FileType file_type() const noexcept override; [[nodiscard]] std::string_view mimetype() const noexcept override; + [[nodiscard]] FileMeta file_meta() const noexcept override; [[nodiscard]] bool is_decodable() const noexcept override; diff --git a/src/odr/internal/zip/zip_file.cpp b/src/odr/internal/zip/zip_file.cpp index 5ae18671b..16bc64123 100644 --- a/src/odr/internal/zip/zip_file.cpp +++ b/src/odr/internal/zip/zip_file.cpp @@ -15,20 +15,20 @@ std::shared_ptr ZipFile::file() const noexcept { return m_zip->file(); } -FileType ZipFile::file_type() const noexcept { return FileType::zip; } - -FileMeta ZipFile::file_meta() const noexcept { - return {FileType::zip, "application/zip", false, std::nullopt}; -} - DecoderEngine ZipFile::decoder_engine() const noexcept { return DecoderEngine::odr; } +FileType ZipFile::file_type() const noexcept { return FileType::zip; } + std::string_view ZipFile::mimetype() const noexcept { return "application/zip"; } +FileMeta ZipFile::file_meta() const noexcept { + return {file_type(), mimetype(), false, std::nullopt}; +} + bool ZipFile::is_decodable() const noexcept { return true; } std::shared_ptr ZipFile::archive() const { diff --git a/src/odr/internal/zip/zip_file.hpp b/src/odr/internal/zip/zip_file.hpp index ec87eaa06..ba3c28ee5 100644 --- a/src/odr/internal/zip/zip_file.hpp +++ b/src/odr/internal/zip/zip_file.hpp @@ -24,10 +24,10 @@ class ZipFile final : public abstract::ArchiveFile { [[nodiscard]] std::shared_ptr file() const noexcept override; - [[nodiscard]] FileType file_type() const noexcept override; - [[nodiscard]] FileMeta file_meta() const noexcept override; [[nodiscard]] DecoderEngine decoder_engine() const noexcept override; + [[nodiscard]] FileType file_type() const noexcept override; [[nodiscard]] std::string_view mimetype() const noexcept override; + [[nodiscard]] FileMeta file_meta() const noexcept override; [[nodiscard]] bool is_decodable() const noexcept override; From f425a371e61fb0449f5263f36be2c14c8f9a4b21 Mon Sep 17 00:00:00 2001 From: Andreas Stefl Date: Sun, 19 Oct 2025 15:28:48 +0200 Subject: [PATCH 4/8] update lock --- conan.lock | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/conan.lock b/conan.lock index ac2a66b8e..7c0ed6676 100644 --- a/conan.lock +++ b/conan.lock @@ -47,19 +47,23 @@ "argon2/20190702-odr#965901884bc82ec8a7c0a1305d42c127%1754427608.210625" ], "build_requires": [ + "zstd/1.5.7#fde461c0d847a22f16d3066774f61b11%1744114235.235", "zlib/1.3.1#b8bc2603263cf7eccbd6e17e66b0ed76%1733936244.862", + "xz_utils/5.4.5#b885d1d79c9d30cff3803f7f551dbe66%1724318972.064", "pkgconf/2.2.0#6462942a22803086372db44689ba825f%1713364853.749", "pkgconf/2.1.0#27f44583701117b571307cf5b5fe5605%1701537936.436", "pkgconf/2.0.3#f996677e96e61e6552d85e83756c328b%1696606182.229", "pcre2/10.42#9a35f5089feb875ec61a38eca364ce77%1743524593.693", "ninja/1.13.0#53ff096207a5599ced46a633271b3cef%1751046277.036", + "msys2/cci.latest#5b73b10144f73cc5bfe0572ed9be39e1%1751977009.857", "meson/1.4.0#2262941cc8fbb0099dd0c196ca2a6c01%1726730116.631", "meson/1.3.2#26ce8a76a36cc275cdfee1d757bc6561%1726730118.251", "meson/1.2.2#21b73818ba96d9eea465b310b5bbc993%1726730120.212", "meson/1.2.1#f2b0c7763308df8e33172744dace8845%1726730117.905", "m4/1.4.19#b38ced39a01e31fef5435bc634461fd2%1700758725.451", "libtool/2.4.7#a182d7ce8d4c346a19dbd4a5d532ef68%1742900203.747", - "libselinux/3.6#5a78ff6ae5034eeaac8da723361a8ce4%1748075177.52", + "libselinux/3.6#5a78ff6ae5034eeaac8da723361a8ce4%1717655459.344", + "libmagic/5.45#791d5bad38d33272bb120994a198b1ac%1727273086.09", "libiconv/1.17#1e65319e945f2d31941a9d28cc13c058%1751451666.321", "libgettext/0.22#35d2811b2dd27a98f69e4daa86ca2000%1714393058.647", "libffi/3.4.8#06926dca35bcf8e321fcc24def952cde%1748531860.405", @@ -69,7 +73,7 @@ "gnu-config/cci.20210814#dc430d754f465e8c74463019672fb97b%1701248168.479", "glib/2.81.0-odr#ddf445d5af468f972978af93c44d26e1%1754427608.367101", "gettext/0.22.5#a1f31cc77dee0345699745ef39686dd0%1750252839.982", - "flex/2.6.4#e35bc44b3fcbcd661e0af0dc5b5b1ad4%1748075168.656", + "flex/2.6.4#e35bc44b3fcbcd661e0af0dc5b5b1ad4%1674818991.113", "cmake/3.31.8#dd6e07c418afc4b30cb1c21584dccc49%1750223587.75", "bzip2/1.0.8#00b4a4658791c1f06914e087f0e792f5%1744702067.178", "automake/1.16.5#058bda3e21c36c9aa8425daf3c1faf50%1688481772.751", From f3fedfe37200d7b1e3db2f8be163c12e44452d40 Mon Sep 17 00:00:00 2001 From: Andreas Stefl Date: Sun, 19 Oct 2025 15:34:11 +0200 Subject: [PATCH 5/8] cut out windows --- conanfile.py | 1 + 1 file changed, 1 insertion(+) diff --git a/conanfile.py b/conanfile.py index 7669c2da8..40c453f18 100644 --- a/conanfile.py +++ b/conanfile.py @@ -36,6 +36,7 @@ def config_options(self): del self.options.fPIC del self.options.with_pdf2htmlEX del self.options.with_wvWare + del self.options.with_libmagic def requirements(self): self.requires("pugixml/1.14") From 40a6a4bd643fa29957c5a8b45b7c9f19c65a564b Mon Sep 17 00:00:00 2001 From: Andreas Stefl Date: Sun, 19 Oct 2025 16:02:19 +0200 Subject: [PATCH 6/8] test --- CMakeLists.txt | 1 + conanfile.py | 2 +- src/odr/global_params.cpp | 10 +++++----- src/odr/global_params.hpp | 6 +++--- src/odr/internal/libmagic/libmagic.cpp | 6 +++--- src/odr/internal/libmagic/libmagic.hpp | 2 +- src/odr/internal/magic.cpp | 4 ++-- src/odr/internal/project_info.cpp.in | 4 ++-- src/odr/internal/project_info.hpp | 2 +- test/src/internal/magic_test.cpp | 24 ++++++++++++++++++++++++ 10 files changed, 43 insertions(+), 18 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7a39152f9..5a4089b8e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -16,6 +16,7 @@ option(ODR_CLI "enable command line interface" ON) option(ODR_CLANG_TIDY "Run clang-tidy static analysis" OFF) option(WITH_PDF2HTMLEX "Build with pdf2htmlEX" ON) option(WITH_WVWARE "Build with wvWare" ON) +option(WITH_LIBMAGIC "Build with libmagic" ON) # TODO defining global compiler flags seems to be bad practice with conan # TODO consider using conan profiles diff --git a/conanfile.py b/conanfile.py index 40c453f18..7f02890ed 100644 --- a/conanfile.py +++ b/conanfile.py @@ -82,7 +82,7 @@ def generate(self): for dep in deps: runenv_info.compose_env(dep.runenv_info) envvars = runenv_info.vars(self) - for v in ["PDF2HTMLEX_DATA_DIR", "POPPLER_DATA_DIR", "FONTCONFIG_PATH", "WVDATADIR"]: + for v in ["PDF2HTMLEX_DATA_DIR", "POPPLER_DATA_DIR", "FONTCONFIG_PATH", "WVDATADIR", "MAGIC"]: tc.variables[v] = envvars.get(v) tc.generate() diff --git a/src/odr/global_params.cpp b/src/odr/global_params.cpp index 5a9a3e52c..5a75bbbfb 100644 --- a/src/odr/global_params.cpp +++ b/src/odr/global_params.cpp @@ -41,8 +41,8 @@ const std::string &GlobalParams::pdf2htmlex_data_path() { return instance().m_pdf2htmlex_data_path; } -const std::string &GlobalParams::libmagic_data_path() { - return instance().m_libmagic_data_path; +const std::string &GlobalParams::libmagic_path() { + return instance().m_libmagic_path; } void GlobalParams::set_odr_core_data_path(const std::string &path) { @@ -66,8 +66,8 @@ void GlobalParams::set_pdf2htmlex_data_path(const std::string &path) { instance().m_pdf2htmlex_data_path = path; } -void GlobalParams::set_libmagic_data_path(const std::string &path) { - instance().m_libmagic_data_path = path; +void GlobalParams::set_libmagic_path(const std::string &path) { + instance().m_libmagic_path = path; } GlobalParams::GlobalParams() @@ -75,6 +75,6 @@ GlobalParams::GlobalParams() m_fontconfig_data_path{internal::project_info::fontconfig_data_path()}, m_poppler_data_path{internal::project_info::poppler_data_path()}, m_pdf2htmlex_data_path{internal::project_info::pdf2htmlex_data_path()}, - m_libmagic_data_path{internal::project_info::libmagic_data_path()} {} + m_libmagic_path{internal::project_info::libmagic_path()} {} } // namespace odr diff --git a/src/odr/global_params.hpp b/src/odr/global_params.hpp index 65810c712..26ee84f08 100644 --- a/src/odr/global_params.hpp +++ b/src/odr/global_params.hpp @@ -10,13 +10,13 @@ class GlobalParams { static const std::string &fontconfig_data_path(); static const std::string &poppler_data_path(); static const std::string &pdf2htmlex_data_path(); - static const std::string &libmagic_data_path(); + static const std::string &libmagic_path(); static void set_odr_core_data_path(const std::string &path); static void set_fontconfig_data_path(const std::string &path); static void set_poppler_data_path(const std::string &path); static void set_pdf2htmlex_data_path(const std::string &path); - static void set_libmagic_data_path(const std::string &path); + static void set_libmagic_path(const std::string &path); private: static GlobalParams &instance(); @@ -27,7 +27,7 @@ class GlobalParams { std::string m_fontconfig_data_path; std::string m_poppler_data_path; std::string m_pdf2htmlex_data_path; - std::string m_libmagic_data_path; + std::string m_libmagic_path; }; } // namespace odr diff --git a/src/odr/internal/libmagic/libmagic.cpp b/src/odr/internal/libmagic/libmagic.cpp index 4f826f3ff..18a0e1864 100644 --- a/src/odr/internal/libmagic/libmagic.cpp +++ b/src/odr/internal/libmagic/libmagic.cpp @@ -29,8 +29,8 @@ magic_t get_magic_cookie() { if (!magic_cookie) { throw std::runtime_error("magic_open failed"); } - if (magic_load(magic_cookie.get(), - GlobalParams::libmagic_data_path().c_str()) == 0) { + if (magic_load(magic_cookie.get(), GlobalParams::libmagic_path().c_str()) == + 0) { return magic_cookie.get(); } if (magic_load(magic_cookie.get(), nullptr) == 0) { @@ -42,7 +42,7 @@ magic_t get_magic_cookie() { } // namespace -const char *libmagic::mime_type(const std::string &path) { +const char *libmagic::mimetype(const std::string &path) { const magic_t magic_cookie = get_magic_cookie(); return magic_file(magic_cookie, path.c_str()); } diff --git a/src/odr/internal/libmagic/libmagic.hpp b/src/odr/internal/libmagic/libmagic.hpp index e628992fc..17f5a7c9a 100644 --- a/src/odr/internal/libmagic/libmagic.hpp +++ b/src/odr/internal/libmagic/libmagic.hpp @@ -3,5 +3,5 @@ #include namespace odr::internal::libmagic { -const char *mime_type(const std::string &path); +const char *mimetype(const std::string &path); } diff --git a/src/odr/internal/magic.cpp b/src/odr/internal/magic.cpp index e6853f275..c5df6d6d1 100644 --- a/src/odr/internal/magic.cpp +++ b/src/odr/internal/magic.cpp @@ -89,8 +89,8 @@ FileType magic::file_type(const File &file) { } std::string_view magic::mimetype(const std::string &path) { -#ifdef ODR_USE_LIBMAGIC - return libmagic::mime_type(path); +#ifdef ODR_WITH_LIBMAGIC + return libmagic::mimetype(path); #else return odr::mimetype_by_file_type(magic::file_type(path)); #endif diff --git a/src/odr/internal/project_info.cpp.in b/src/odr/internal/project_info.cpp.in index f06149abf..7dc722ce8 100644 --- a/src/odr/internal/project_info.cpp.in +++ b/src/odr/internal/project_info.cpp.in @@ -52,8 +52,8 @@ const char *project_info::pdf2htmlex_data_path() noexcept{ return "${PDF2HTMLEX_DATA_DIR}"; } -const char *project_info::libmagic_data_path() noexcept{ - return "${LIBMAGIC_DATA_DIR}"; +const char *project_info::libmagic_path() noexcept{ + return "${MAGIC}"; } } // namespace odr::internal diff --git a/src/odr/internal/project_info.hpp b/src/odr/internal/project_info.hpp index 8a672ea36..eabe8e545 100644 --- a/src/odr/internal/project_info.hpp +++ b/src/odr/internal/project_info.hpp @@ -13,6 +13,6 @@ const char *odr_data_path() noexcept; const char *fontconfig_data_path() noexcept; const char *poppler_data_path() noexcept; const char *pdf2htmlex_data_path() noexcept; -const char *libmagic_data_path() noexcept; +const char *libmagic_path() noexcept; } // namespace odr::internal::project_info diff --git a/test/src/internal/magic_test.cpp b/test/src/internal/magic_test.cpp index 6ff06fa5f..00607b67c 100644 --- a/test/src/internal/magic_test.cpp +++ b/test/src/internal/magic_test.cpp @@ -1,6 +1,7 @@ #include #include +#include #include @@ -13,26 +14,49 @@ using namespace odr::test; TEST(magic, odt) { const File file(TestData::test_file_path("odr-public/odt/about.odt")); EXPECT_EQ(magic::file_type(*file.impl()), FileType::zip); + + if (project_info::has_libmagic()) { + EXPECT_EQ(magic::mimetype(file.disk_path().value()), + "application/vnd.oasis.opendocument.text"); + } } TEST(magic, doc) { const File file(TestData::test_file_path("odr-public/doc/empty.doc")); EXPECT_EQ(magic::file_type(*file.impl()), FileType::compound_file_binary_format); + + if (project_info::has_libmagic()) { + EXPECT_EQ(magic::mimetype(file.disk_path().value()), "application/msword"); + } } TEST(magic, svm) { const File file(TestData::test_file_path("odr-public/svm/chart-1.svm")); EXPECT_EQ(magic::file_type(*file.impl()), FileType::starview_metafile); + + if (project_info::has_libmagic()) { + EXPECT_EQ(magic::mimetype(file.disk_path().value()), + "application/octet-stream"); + } } TEST(magic, odf) { const File file(TestData::test_file_path("odr-private/pdf/sample.pdf")); EXPECT_EQ(magic::file_type(*file.impl()), FileType::portable_document_format); + + if (project_info::has_libmagic()) { + EXPECT_EQ(magic::mimetype(file.disk_path().value()), "application/pdf"); + } } TEST(magic, wpd) { const File file( TestData::test_file_path("odr-public/wpd/Sync3 Sample Page.wpd")); EXPECT_EQ(magic::file_type(*file.impl()), FileType::word_perfect); + + if (project_info::has_libmagic()) { + EXPECT_EQ(magic::mimetype(file.disk_path().value()), + "application/vnd.wordperfect"); + } } From 7a64bddf904c15a55068269826c6d83a7402eb37 Mon Sep 17 00:00:00 2001 From: Andreas Stefl Date: Sun, 19 Oct 2025 16:13:51 +0200 Subject: [PATCH 7/8] minor rename --- src/odr/global_params.cpp | 11 ++++++----- src/odr/global_params.hpp | 6 +++--- src/odr/internal/libmagic/libmagic.cpp | 4 ++-- src/odr/internal/project_info.cpp.in | 2 +- src/odr/internal/project_info.hpp | 2 +- 5 files changed, 13 insertions(+), 12 deletions(-) diff --git a/src/odr/global_params.cpp b/src/odr/global_params.cpp index 5a75bbbfb..df9009e14 100644 --- a/src/odr/global_params.cpp +++ b/src/odr/global_params.cpp @@ -41,8 +41,8 @@ const std::string &GlobalParams::pdf2htmlex_data_path() { return instance().m_pdf2htmlex_data_path; } -const std::string &GlobalParams::libmagic_path() { - return instance().m_libmagic_path; +const std::string &GlobalParams::libmagic_database_path() { + return instance().m_libmagic_database_path; } void GlobalParams::set_odr_core_data_path(const std::string &path) { @@ -66,8 +66,8 @@ void GlobalParams::set_pdf2htmlex_data_path(const std::string &path) { instance().m_pdf2htmlex_data_path = path; } -void GlobalParams::set_libmagic_path(const std::string &path) { - instance().m_libmagic_path = path; +void GlobalParams::set_libmagic_database_path(const std::string &path) { + instance().m_libmagic_database_path = path; } GlobalParams::GlobalParams() @@ -75,6 +75,7 @@ GlobalParams::GlobalParams() m_fontconfig_data_path{internal::project_info::fontconfig_data_path()}, m_poppler_data_path{internal::project_info::poppler_data_path()}, m_pdf2htmlex_data_path{internal::project_info::pdf2htmlex_data_path()}, - m_libmagic_path{internal::project_info::libmagic_path()} {} + m_libmagic_database_path{ + internal::project_info::libmagic_database_path()} {} } // namespace odr diff --git a/src/odr/global_params.hpp b/src/odr/global_params.hpp index 26ee84f08..17e44dcb8 100644 --- a/src/odr/global_params.hpp +++ b/src/odr/global_params.hpp @@ -10,13 +10,13 @@ class GlobalParams { static const std::string &fontconfig_data_path(); static const std::string &poppler_data_path(); static const std::string &pdf2htmlex_data_path(); - static const std::string &libmagic_path(); + static const std::string &libmagic_database_path(); static void set_odr_core_data_path(const std::string &path); static void set_fontconfig_data_path(const std::string &path); static void set_poppler_data_path(const std::string &path); static void set_pdf2htmlex_data_path(const std::string &path); - static void set_libmagic_path(const std::string &path); + static void set_libmagic_database_path(const std::string &path); private: static GlobalParams &instance(); @@ -27,7 +27,7 @@ class GlobalParams { std::string m_fontconfig_data_path; std::string m_poppler_data_path; std::string m_pdf2htmlex_data_path; - std::string m_libmagic_path; + std::string m_libmagic_database_path; }; } // namespace odr diff --git a/src/odr/internal/libmagic/libmagic.cpp b/src/odr/internal/libmagic/libmagic.cpp index 18a0e1864..64ddbc0d1 100644 --- a/src/odr/internal/libmagic/libmagic.cpp +++ b/src/odr/internal/libmagic/libmagic.cpp @@ -29,8 +29,8 @@ magic_t get_magic_cookie() { if (!magic_cookie) { throw std::runtime_error("magic_open failed"); } - if (magic_load(magic_cookie.get(), GlobalParams::libmagic_path().c_str()) == - 0) { + if (magic_load(magic_cookie.get(), + GlobalParams::libmagic_database_path().c_str()) == 0) { return magic_cookie.get(); } if (magic_load(magic_cookie.get(), nullptr) == 0) { diff --git a/src/odr/internal/project_info.cpp.in b/src/odr/internal/project_info.cpp.in index 7dc722ce8..ca3ec8bd1 100644 --- a/src/odr/internal/project_info.cpp.in +++ b/src/odr/internal/project_info.cpp.in @@ -52,7 +52,7 @@ const char *project_info::pdf2htmlex_data_path() noexcept{ return "${PDF2HTMLEX_DATA_DIR}"; } -const char *project_info::libmagic_path() noexcept{ +const char *project_info::libmagic_database_path() noexcept{ return "${MAGIC}"; } diff --git a/src/odr/internal/project_info.hpp b/src/odr/internal/project_info.hpp index eabe8e545..e743ffc59 100644 --- a/src/odr/internal/project_info.hpp +++ b/src/odr/internal/project_info.hpp @@ -13,6 +13,6 @@ const char *odr_data_path() noexcept; const char *fontconfig_data_path() noexcept; const char *poppler_data_path() noexcept; const char *pdf2htmlex_data_path() noexcept; -const char *libmagic_path() noexcept; +const char *libmagic_database_path() noexcept; } // namespace odr::internal::project_info From b6669309912ee705f4a6dff7d55db3b47f3d062d Mon Sep 17 00:00:00 2001 From: Andreas Stefl Date: Sun, 19 Oct 2025 16:22:23 +0200 Subject: [PATCH 8/8] explicit env variables --- conanfile.py | 10 +++++----- src/odr/internal/project_info.cpp.in | 8 ++++---- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/conanfile.py b/conanfile.py index 7f02890ed..62b0c40cf 100644 --- a/conanfile.py +++ b/conanfile.py @@ -77,13 +77,13 @@ def generate(self): # Get runenv info, exported by package_info() of dependencies # We need to obtain PDF2HTMLEX_DATA_DIR, POPPLER_DATA_DIR, FONTCONFIG_PATH and WVDATADIR runenv_info = Environment() - deps = self.dependencies.host.topological_sort - deps = [dep for dep in reversed(deps.values())] - for dep in deps: + for dep in self.dependencies.host.topological_sort.values(): runenv_info.compose_env(dep.runenv_info) envvars = runenv_info.vars(self) - for v in ["PDF2HTMLEX_DATA_DIR", "POPPLER_DATA_DIR", "FONTCONFIG_PATH", "WVDATADIR", "MAGIC"]: - tc.variables[v] = envvars.get(v) + tc.variables["FONTCONFIG_DATA_PATH"] = envvars.get("FONTCONFIG_PATH") + tc.variables["POPPLER_DATA_PATH"] = envvars.get("POPPLER_DATA_DIR") + tc.variables["PDF2HTMLEX_DATA_PATH"] = envvars.get("PDF2HTMLEX_DATA_DIR") + tc.variables["LIBMAGIC_DATABASE_PATH"] = envvars.get("MAGIC") tc.generate() diff --git a/src/odr/internal/project_info.cpp.in b/src/odr/internal/project_info.cpp.in index ca3ec8bd1..ec761e147 100644 --- a/src/odr/internal/project_info.cpp.in +++ b/src/odr/internal/project_info.cpp.in @@ -41,19 +41,19 @@ const char *project_info::odr_data_path() noexcept { } const char *project_info::fontconfig_data_path() noexcept { - return "${FONTCONFIG_PATH}"; + return "${FONTCONFIG_DATA_PATH}"; } const char *project_info::poppler_data_path() noexcept { - return "${POPPLER_DATA_DIR}"; + return "${POPPLER_DATA_PATH}"; } const char *project_info::pdf2htmlex_data_path() noexcept{ - return "${PDF2HTMLEX_DATA_DIR}"; + return "${PDF2HTMLEX_DATA_PATH}"; } const char *project_info::libmagic_database_path() noexcept{ - return "${MAGIC}"; + return "${LIBMAGIC_DATABASE_PATH}"; } } // namespace odr::internal