summaryrefslogtreecommitdiffstats
path: root/magic/Magdir/msooxml
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--magic/Magdir/msooxml68
1 files changed, 68 insertions, 0 deletions
diff --git a/magic/Magdir/msooxml b/magic/Magdir/msooxml
new file mode 100644
index 0000000..905017e
--- /dev/null
+++ b/magic/Magdir/msooxml
@@ -0,0 +1,68 @@
+
+#------------------------------------------------------------------------------
+# $File: msooxml,v 1.19 2023/03/14 19:46:15 christos Exp $
+# msooxml: file(1) magic for Microsoft Office XML
+# From: Ralf Brown <ralf.brown@gmail.com>
+
+# .docx, .pptx, and .xlsx are XML plus other files inside a ZIP
+# archive. The first member file is normally "[Content_Types].xml".
+# but some libreoffice generated files put this later. Perhaps skip
+# the "[Content_Types].xml" test?
+# Since MSOOXML doesn't have anything like the uncompressed "mimetype"
+# file of ePub or OpenDocument, we'll have to scan for a filename
+# which can distinguish between the three types
+
+0 name msooxml
+>0 string word/ Microsoft Word 2007+
+!:mime application/vnd.openxmlformats-officedocument.wordprocessingml.document
+!:ext docx
+>0 string ppt/ Microsoft PowerPoint 2007+
+!:mime application/vnd.openxmlformats-officedocument.presentationml.presentation
+!:ext pptx
+>0 string xl/ Microsoft Excel 2007+
+!:mime application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
+!:ext xlsx
+>0 string visio/ Microsoft Visio 2013+
+!:mime application/vnd.ms-visio.drawing.main+xml
+>0 string AppManifest.xaml Microsoft Silverlight Application
+!:mime application/x-silverlight-app
+
+# start by checking for ZIP local file header signature
+0 string PK\003\004
+!:strength +10
+# make sure the first file is correct
+>0x1E use msooxml
+>0x1E default x
+>>0x1E regex \\[Content_Types\\]\\.xml|_rels/\\.rels|docProps|customXml
+# skip to the second local file header
+# since some documents include a 520-byte extra field following the file
+# header, we need to scan for the next header
+>>>(18.l+49) search/6000 PK\003\004
+# now skip to the *third* local file header; again, we need to scan due to a
+# 520-byte extra field following the file header
+>>>>&26 search/6000 PK\003\004
+# and check the subdirectory name to determine which type of OOXML
+# file we have. Correct the mimetype with the registered ones:
+# https://technet.microsoft.com/en-us/library/cc179224.aspx
+>>>>>&26 use msooxml
+>>>>>&26 default x
+# OpenOffice/Libreoffice orders ZIP entry differently, so check the 4th file
+>>>>>>&26 search/6000 PK\003\004
+>>>>>>>&26 use msooxml
+# Some OOXML generators add an extra customXml directory. Check another file.
+>>>>>>>&26 default x
+>>>>>>>>&26 search/6000 PK\003\004
+>>>>>>>>>&26 use msooxml
+>>>>>>>>>&26 default x Microsoft OOXML
+>>>>>>>&26 default x Microsoft OOXML
+>>>>>&26 default x Microsoft OOXML
+>>0x1E regex \\[trash\\]
+>>>&26 search/6000 PK\003\004
+>>>>&26 search/6000 PK\003\004
+>>>>>&26 use msooxml
+>>>>>&26 default x
+>>>>>>&26 search/6000 PK\003\004
+>>>>>>>&26 use msooxml
+>>>>>>>&26 default x Microsoft OOXML
+>>>>>>&26 default x Microsoft OOXML
+>>>>>&26 default x Microsoft OOXML