diff options
Diffstat (limited to 'magic/Magdir/msooxml')
-rw-r--r-- | magic/Magdir/msooxml | 68 |
1 files changed, 68 insertions, 0 deletions
diff --git a/magic/Magdir/msooxml b/magic/Magdir/msooxml new file mode 100644 index 0000000..905017e --- /dev/null +++ b/magic/Magdir/msooxml @@ -0,0 +1,68 @@ + +#------------------------------------------------------------------------------ +# $File: msooxml,v 1.19 2023/03/14 19:46:15 christos Exp $ +# msooxml: file(1) magic for Microsoft Office XML +# From: Ralf Brown <ralf.brown@gmail.com> + +# .docx, .pptx, and .xlsx are XML plus other files inside a ZIP +# archive. The first member file is normally "[Content_Types].xml". +# but some libreoffice generated files put this later. Perhaps skip +# the "[Content_Types].xml" test? +# Since MSOOXML doesn't have anything like the uncompressed "mimetype" +# file of ePub or OpenDocument, we'll have to scan for a filename +# which can distinguish between the three types + +0 name msooxml +>0 string word/ Microsoft Word 2007+ +!:mime application/vnd.openxmlformats-officedocument.wordprocessingml.document +!:ext docx +>0 string ppt/ Microsoft PowerPoint 2007+ +!:mime application/vnd.openxmlformats-officedocument.presentationml.presentation +!:ext pptx +>0 string xl/ Microsoft Excel 2007+ +!:mime application/vnd.openxmlformats-officedocument.spreadsheetml.sheet +!:ext xlsx +>0 string visio/ Microsoft Visio 2013+ +!:mime application/vnd.ms-visio.drawing.main+xml +>0 string AppManifest.xaml Microsoft Silverlight Application +!:mime application/x-silverlight-app + +# start by checking for ZIP local file header signature +0 string PK\003\004 +!:strength +10 +# make sure the first file is correct +>0x1E use msooxml +>0x1E default x +>>0x1E regex \\[Content_Types\\]\\.xml|_rels/\\.rels|docProps|customXml +# skip to the second local file header +# since some documents include a 520-byte extra field following the file +# header, we need to scan for the next header +>>>(18.l+49) search/6000 PK\003\004 +# now skip to the *third* local file header; again, we need to scan due to a +# 520-byte extra field following the file header +>>>>&26 search/6000 PK\003\004 +# and check the subdirectory name to determine which type of OOXML +# file we have. Correct the mimetype with the registered ones: +# https://technet.microsoft.com/en-us/library/cc179224.aspx +>>>>>&26 use msooxml +>>>>>&26 default x +# OpenOffice/Libreoffice orders ZIP entry differently, so check the 4th file +>>>>>>&26 search/6000 PK\003\004 +>>>>>>>&26 use msooxml +# Some OOXML generators add an extra customXml directory. Check another file. +>>>>>>>&26 default x +>>>>>>>>&26 search/6000 PK\003\004 +>>>>>>>>>&26 use msooxml +>>>>>>>>>&26 default x Microsoft OOXML +>>>>>>>&26 default x Microsoft OOXML +>>>>>&26 default x Microsoft OOXML +>>0x1E regex \\[trash\\] +>>>&26 search/6000 PK\003\004 +>>>>&26 search/6000 PK\003\004 +>>>>>&26 use msooxml +>>>>>&26 default x +>>>>>>&26 search/6000 PK\003\004 +>>>>>>>&26 use msooxml +>>>>>>>&26 default x Microsoft OOXML +>>>>>>&26 default x Microsoft OOXML +>>>>>&26 default x Microsoft OOXML |