summaryrefslogtreecommitdiff
path: root/magic/Magdir/archive
diff options
context:
space:
mode:
Diffstat (limited to 'magic/Magdir/archive')
-rw-r--r--magic/Magdir/archive362
1 files changed, 297 insertions, 65 deletions
diff --git a/magic/Magdir/archive b/magic/Magdir/archive
index 4ef73a7..abecf71 100644
--- a/magic/Magdir/archive
+++ b/magic/Magdir/archive
@@ -1,5 +1,5 @@
#------------------------------------------------------------------------------
-# $File: archive,v 1.88 2014/08/16 10:42:17 christos Exp $
+# $File: archive,v 1.108 2017/08/30 13:45:10 christos Exp $
# archive: file(1) magic for archive formats (see also "msdos" for self-
# extracting compressed archives)
#
@@ -246,7 +246,15 @@
# BA
# TODO: idarc says "bytes 0-2 == bytes 3-5"
# TTComp
-0 string \0\6 TTComp archive data
+# URL: http://fileformats.archiveteam.org/wiki/TTComp_archive
+# Update: Joerg Jenderek
+# GRR: line below is too general as it matches also Panorama database "TCDB 2003-10 demo.pan", others
+0 string \0\6
+# look for first keyword of Panorama database *.pan
+>12 search/261 DESIGN
+# skip keyword with low entropy
+>12 default x TTComp archive, binary, 4K dictionary
+# (version 5.25) labeled the above entry as "TTComp archive data"
# ESP, could this conflict with Easy Software Products' (e.g.ESP ghostscript) documentation?
0 string ESP ESP archive data
# ZPack
@@ -434,16 +442,34 @@
# AIN
0 string \x33\x18 AIN archive data
0 string \x33\x17 AIN archive data
-# XPA32
-0 string xpa\0\1 XPA32 archive data
+# XPA32 test moved and merged with XPA by Joerg Jenderek at Sep 2015
# SZip (TODO: doesn't catch all versions)
0 string SZ\x0a\4 SZip archive data
# XPack DiskImage
-0 string jm XPack DiskImage archive data
+# *.XDI updated by Joerg Jenderek Sep 2015
+# ftp://ftp.sac.sk/pub/sac/pack/0index.txt
+# GRR: this test is still too general as it catches also text files starting with jm
+0 string jm
+# only found examples with this additional characteristic 2 bytes
+>2 string \x2\x4 Xpack DiskImage archive data
+#!:ext xdi
# XPack Data
-0 string xpa XPack archive data
+# *.xpa updated by Joerg Jenderek Sep 2015
+# ftp://ftp.elf.stuba.sk/pub/pc/pack/
+0 string xpa XPA
+!:ext xpa
+# XPA32
+# ftp://ftp.elf.stuba.sk/pub/pc/pack/xpa32.zip
+# created by XPA32.EXE version 1.0.2 for Windows
+>0 string xpa\0\1 \b32 archive data
+# created by XPACK.COM version 1.67m or 1.67r with short 0x1800
+>3 ubeshort !0x0001 \bck archive data
# XPack Single Data
-0 string \xc3\x8d\ jm XPack single archive data
+# changed by Joerg Jenderek Sep 2015 back to like in version 5.12
+# letter 'I'+ acute accent is equivalent to \xcd
+0 string \xcd\ jm Xpack single archive data
+#!:mime application/x-xpa-compressed
+!:ext xpa
# TODO: missing due to unknown magic/magic at end of file:
#DWC
@@ -526,55 +552,212 @@
>>0x36 string >\0 fstype %.8s
# LHARC/LHA archiver (Greg Roelofs, newt@uchicago.edu)
-2 string -lh0- LHarc 1.x/ARX archive data [lh0]
-!:mime application/x-lharc
-2 string -lh1- LHarc 1.x/ARX archive data [lh1]
-!:mime application/x-lharc
-2 string -lz4- LHarc 1.x archive data [lz4]
-!:mime application/x-lharc
-2 string -lz5- LHarc 1.x archive data [lz5]
-!:mime application/x-lharc
+# Update: Joerg Jenderek
+# URL: https://en.wikipedia.org/wiki/LHA_(file_format)
+# Reference: http://web.archive.org/web/20021005080911/http://www.osirusoft.com/joejared/lzhformat.html
+#
+# check and display information of lharc (LHa,PMarc) file
+0 name lharc-file
+# check 1st character of method id like -lz4- -lh5- or -pm2-
+>2 string -
+# check 5th character of method id
+>>6 string -
+# check header level 0 1 2 3
+>>>20 ubyte <4
+# check 2nd, 3th and 4th character of method id
+>>>>3 regex \^(lh[0-9a-ex]|lz[s2-8]|pm[012]|pc1) \b
+!:mime application/x-lzh-compressed
+# creator type "LHA "
+!:apple ????LHA
+# display archive type name like "LHa/LZS archive data" or "LArc archive"
+>>>>>2 string -lz \b
+!:ext lzs
+# already known -lzs- -lz4- -lz5- with old names
+>>>>>>2 string -lzs LHa/LZS archive data
+>>>>>>3 regex \^lz[45] LHarc 1.x archive data
+# missing -lz?- with wikipedia names
+>>>>>>3 regex \^lz[2378] LArc archive
+# display archive type name like "LHa (2.x) archive data"
+>>>>>2 string -lh \b
+# already known -lh0- -lh1- -lh2- -lh3- -lh4- -lh5- -lh6- -lh7- -lhd- variants with old names
+>>>>>>3 regex \^lh[01] LHarc 1.x/ARX archive data
+# LHice archiver use ".ICE" as name extension instead usual one ".lzh"
+# FOOBAR archiver use ".foo" as name extension instead usual one
+# "Florain Orjanov's and Olga Bachetska's ARchiver" not found at the moment
+>>>>>>>2 string -lh1 \b
+!:ext lha/lzh/ice
+>>>>>>3 regex \^lh[23d] LHa 2.x? archive data
+>>>>>>3 regex \^lh[7] LHa (2.x)/LHark archive data
+>>>>>>3 regex \^lh[456] LHa (2.x) archive data
+>>>>>>>2 string -lh5 \b
+# https://en.wikipedia.org/wiki/BIOS
+# Some mainboard BIOS like Award use LHa compression. So archives with unusal extension are found like
+# bios.rom , kd7_v14.bin, 1010.004, ...
+!:ext lha/lzh/rom/bin
+# missing -lh?- variants (Joe Jared)
+>>>>>>3 regex \^lh[89a-ce] LHa (Joe Jared) archive
+# UNLHA32 2.67a
+>>>>>>2 string -lhx LHa (UNLHA32) archive
+# lha archives with standard file name extensions ".lha" ".lzh"
+>>>>>>3 regex !\^(lh1|lh5) \b
+!:ext lha/lzh
+# this should not happen if all -lh variants are described
+>>>>>>2 default x LHa (unknown) archive
+#!:ext lha
+# PMarc
+>>>>>3 regex \^pm[012] PMarc archive data
+!:ext pma
+# append method id without leading and trailing minus character
+>>>>>3 string x [%3.3s]
+>>>>>>0 use lharc-header
+#
+# check and display information of lharc header
+0 name lharc-header
+# header size 0x4 , 0x1b-0x61
+>0 ubyte x
+# compressed data size != compressed file size
+#>7 ulelong x \b, data size %d
+# attribute: 0x2~?? 0x10~symlink|target 0x20~normal
+#>19 ubyte x \b, 19_0x%x
+# level identifier 0 1 2 3
+#>20 ubyte x \b, level %d
+# time stamp
+#>15 ubelong x DATE 0x%8.8x
+# OS ID for level 1
+>20 ubyte 1
+# 0x20 types find for *.rom files
+>>(21.b+24) ubyte <0x21 \b, 0x%x OS
+# ascii type like M for MSDOS
+>>(21.b+24) ubyte >0x20 \b, '%c' OS
+# OS ID for level 2
+>20 ubyte 2
+#>>23 ubyte x \b, OS ID 0x%x
+>>23 ubyte <0x21 \b, 0x%x OS
+>>23 ubyte >0x20 \b, '%c' OS
+# filename only for level 0 and 1
+>20 ubyte <2
+# length of filename
+>>21 ubyte >0 \b, with
+# filename
+>>>21 pstring x "%s"
+#
+#2 string -lh0- LHarc 1.x/ARX archive data [lh0]
+#!:mime application/x-lharc
+2 string -lh0-
+>0 use lharc-file
+#2 string -lh1- LHarc 1.x/ARX archive data [lh1]
+#!:mime application/x-lharc
+2 string -lh1-
+>0 use lharc-file
+# NEW -lz2- ... -lz8-
+2 string -lz2-
+>0 use lharc-file
+2 string -lz3-
+>0 use lharc-file
+2 string -lz4-
+>0 use lharc-file
+2 string -lz5-
+>0 use lharc-file
+2 string -lz7-
+>0 use lharc-file
+2 string -lz8-
+>0 use lharc-file
# [never seen any but the last; -lh4- reported in comp.compression:]
-2 string -lzs- LHa/LZS archive data [lzs]
-!:mime application/x-lha
-2 string -lh\40- LHa 2.x? archive data [lh ]
-!:mime application/x-lha
-2 string -lhd- LHa 2.x? archive data [lhd]
-!:mime application/x-lha
-2 string -lh2- LHa 2.x? archive data [lh2]
-!:mime application/x-lha
-2 string -lh3- LHa 2.x? archive data [lh3]
-!:mime application/x-lha
-2 string -lh4- LHa (2.x) archive data [lh4]
-!:mime application/x-lha
-2 string -lh5- LHa (2.x) archive data [lh5]
-!:mime application/x-lha
-2 string -lh6- LHa (2.x) archive data [lh6]
-!:mime application/x-lha
-2 string -lh7- LHa (2.x)/LHark archive data [lh7]
-!:mime application/x-lha
->20 byte x - header level %d
+#2 string -lzs- LHa/LZS archive data [lzs]
+2 string -lzs-
+>0 use lharc-file
+# According to wikipedia and others such a version does not exist
+#2 string -lh\40- LHa 2.x? archive data [lh ]
+#2 string -lhd- LHa 2.x? archive data [lhd]
+2 string -lhd-
+>0 use lharc-file
+#2 string -lh2- LHa 2.x? archive data [lh2]
+2 string -lh2-
+>0 use lharc-file
+#2 string -lh3- LHa 2.x? archive data [lh3]
+2 string -lh3-
+>0 use lharc-file
+#2 string -lh4- LHa (2.x) archive data [lh4]
+2 string -lh4-
+>0 use lharc-file
+#2 string -lh5- LHa (2.x) archive data [lh5]
+2 string -lh5-
+>0 use lharc-file
+#2 string -lh6- LHa (2.x) archive data [lh6]
+2 string -lh6-
+>0 use lharc-file
+#2 string -lh7- LHa (2.x)/LHark archive data [lh7]
+2 string -lh7-
+# !:mime application/x-lha
+# >20 byte x - header level %d
+>0 use lharc-file
+# NEW -lh8- ... -lhe- , -lhx-
+2 string -lh8-
+>0 use lharc-file
+2 string -lh9-
+>0 use lharc-file
+2 string -lha-
+>0 use lharc-file
+2 string -lhb-
+>0 use lharc-file
+2 string -lhc-
+>0 use lharc-file
+2 string -lhe-
+>0 use lharc-file
+2 string -lhx-
+>0 use lharc-file
# taken from idarc [JW]
2 string -lZ PUT archive data
-2 string -lz LZS archive data
+# already done by LHarc magics
+# this should never happen if all sub types of LZS archive are identified
+#2 string -lz LZS archive data
2 string -sw1- Swag archive data
-# RAR archiver (Greg Roelofs, newt@uchicago.edu)
-0 string Rar! RAR archive data,
+0 name rar-file-header
+>24 byte 15 \b, v1.5
+>24 byte 20 \b, v2.0
+>24 byte 29 \b, v4
+>15 byte 0 \b, os: MS-DOS
+>15 byte 1 \b, os: OS/2
+>15 byte 2 \b, os: Win32
+>15 byte 3 \b, os: Unix
+>15 byte 4 \b, os: Mac OS
+>15 byte 5 \b, os: BeOS
+
+0 name rar-archive-header
+>3 leshort&0x1ff >0 \b, flags:
+>>3 leshort &0x01 ArchiveVolume
+>>3 leshort &0x02 Commented
+>>3 leshort &0x04 Locked
+>>3 leshort &0x10 NewVolumeNaming
+>>3 leshort &0x08 Solid
+>>3 leshort &0x20 Authenticated
+>>3 leshort &0x40 RecoveryRecordPresent
+>>3 leshort &0x80 EncryptedBlockHeader
+>>3 leshort &0x100 FirstVolume
+
+# RAR (Roshal Archive) archive
+0 string Rar!\x1a\7\0 RAR archive data
!:mime application/x-rar
->44 byte x v%0x,
->10 byte >0 flags:
->>10 byte &0x01 Archive volume,
->>10 byte &0x02 Commented,
->>10 byte &0x04 Locked,
->>10 byte &0x08 Solid,
->>10 byte &0x20 Authenticated,
->35 byte 0 os: MS-DOS
->35 byte 1 os: OS/2
->35 byte 2 os: Win32
->35 byte 3 os: Unix
-# some old version? idarc says:
-0 string RE\x7e\x5e RAR archive data
+!:ext rar/cbr
+# file header
+>(0xc.l+9) byte 0x74
+>>(0xc.l+7) use rar-file-header
+# subblock seems to share information with file header
+>(0xc.l+9) byte 0x7a
+>>(0xc.l+7) use rar-file-header
+>9 byte 0x73
+>>7 use rar-archive-header
+
+0 string Rar!\x1a\7\1\0 RAR archive data, v5
+!:mime application/x-rar
+!:ext rar
+
+# Very old RAR archive
+# http://jasonblanks.com/wp-includes/images/papers/KnowyourarchiveRAR.pdf
+0 string RE\x7e\x5e RAR archive data (<v1.5)
+!:mime application/x-rar
+!:ext rar/cbr
# SQUISH archiver (Greg Roelofs, newt@uchicago.edu)
0 string SQSH squished archive data (Acorn RISCOS)
@@ -586,9 +769,12 @@
# PKZIP multi-volume archive
0 string PK\x07\x08PK\x03\x04 Zip multi-volume archive data, at least PKZIP v2.50 to extract
!:mime application/zip
+!:ext zip/cbz
# Zip archives (Greg Roelofs, c/o zip-bugs@wkuvx1.wku.edu)
0 string PK\005\006 Zip archive data (empty)
+!:mime application/zip
+!:ext zip/cbz
0 string PK\003\004
# Specialised zip formats which start with a member named 'mimetype'
@@ -705,6 +891,14 @@
>(26.s+30) leshort 0xcafe Java archive data (JAR)
!:mime application/java-archive
+# iOS App
+>(26.s+30) leshort !0xcafe
+>>26 string !\x8\0\0\0mimetype
+>>>30 string Payload/
+>>>>38 search/64 .app/ iOS App
+!:mime application/x-ios-app
+
+
# Generic zip archives (Greg Roelofs, c/o zip-bugs@wkuvx1.wku.edu)
# Next line excludes specialized formats:
>(26.s+30) leshort !0xcafe
@@ -714,7 +908,17 @@
>>>4 byte 0x0a \b, at least v1.0 to extract
>>>4 byte 0x0b \b, at least v1.1 to extract
>>>4 byte 0x14 \b, at least v2.0 to extract
->>>4 byte 0x2d \b, at least v3.0 to extract
+>>>4 byte 0x15 \b, at least v2.1 to extract
+>>>4 byte 0x19 \b, at least v2.5 to extract
+>>>4 byte 0x1b \b, at least v2.7 to extract
+>>>4 byte 0x2d \b, at least v4.5 to extract
+>>>4 byte 0x2e \b, at least v4.6 to extract
+>>>4 byte 0x32 \b, at least v5.0 to extract
+>>>4 byte 0x33 \b, at least v5.1 to extract
+>>>4 byte 0x34 \b, at least v5.2 to extract
+>>>4 byte 0x3d \b, at least v6.1 to extract
+>>>4 byte 0x3e \b, at least v6.2 to extract
+>>>4 byte 0x3f \b, at least v6.3 to extract
>>>0x161 string WINZIP \b, WinZIP self-extracting
# StarView Metafile
@@ -746,12 +950,24 @@
0 string \0\ \ \ \ \ \ \ \ \ \ \ \0\0 LBR archive data
#
# PMA (CP/M derivative of LHA)
+# Update: Joerg Jenderek
+# URL: https://en.wikipedia.org/wiki/LHA_(file_format)
#
-2 string -pm0- PMarc archive data [pm0]
-2 string -pm1- PMarc archive data [pm1]
-2 string -pm2- PMarc archive data [pm2]
+#2 string -pm0- PMarc archive data [pm0]
+2 string -pm0-
+>0 use lharc-file
+#2 string -pm1- PMarc archive data [pm1]
+2 string -pm1-
+>0 use lharc-file
+#2 string -pm2- PMarc archive data [pm2]
+2 string -pm2-
+>0 use lharc-file
2 string -pms- PMarc SFX archive (CP/M, DOS)
+#!:mime application/x-foobar-exec
+!:ext com
5 string -pc1- PopCom compressed executable (CP/M)
+#!:mime application/x-
+#!:ext com
# From Rafael Laboissiere <rafael@laboissiere.net>
# The Project Revision Control System (see
@@ -784,6 +1000,9 @@
# Felix von Leitner <felix-file@fefe.de>
0 string d8:announce BitTorrent file
!:mime application/x-bittorrent
+# Durval Menezes, <jmgthbfile at durval dot com>
+0 string d13:announce-list BitTorrent file
+!:mime application/x-bittorrent
# Atari MSA archive - Teemu Hukkanen <tjhukkan@iki.fi>
0 beshort 0x0e0f Atari MSA archive data
@@ -871,19 +1090,16 @@
# From "Nelson A. de Oliveira" <naoliv@gmail.com>
0 string MPQ\032 MoPaQ (MPQ) archive
-# From: Dirk Jagdmann <doj@cubic.org>
-# xar archive format: http://code.google.com/p/xar/
-0 string xar! xar archive
->6 beshort x - version %d
-
# From: "Nelson A. de Oliveira" <naoliv@gmail.com>
# .kgb
0 string KGB_arch KGB Archiver file
>10 string x with compression level %.1s
# xar (eXtensible ARchiver) archive
+# xar archive format: http://code.google.com/p/xar/
# From: "David Remahl" <dremahl@apple.com>
0 string xar! xar archive
+!:mime application/x-xar
#>4 beshort x header size %d
>6 beshort x version %d,
#>8 quad x compressed TOC: %d,
@@ -911,6 +1127,9 @@
# ZPAQ: http://mattmahoney.net/dc/zpaq.html
0 string zPQ ZPAQ stream
>3 byte x \b, level %d
+# From: Barry Carter <carter.barry@gmail.com>
+# http://encode.ru/threads/456-zpaq-updates/page32
+0 string 7kSt ZPAQ file
# BBeB ebook, unencrypted (LRF format)
# URL: http://www.sven.de/librie/Librie/LrfFormat
@@ -936,12 +1155,12 @@
>3 ubyte 0 \b, no compression
>3 ubyte 2 \b, fast compression (Z1)
>3 ubyte 3 \b, medium compression (Z2)
->3 ubyte >3
+>3 ubyte >3
>>3 ubyte <11 \b, compression (Z%d-1)
->2 ubyte&0x08 0x00
+>2 ubyte&0x08 0x00
# ~ 30 byte password field only for *.gho
>>12 ubequad !0 \b, password protected
->>44 ubyte !1
+>>44 ubyte !1
# 1~Image All, sector-by-sector only for *.gho
>>>10 ubyte 1 \b, sector copy
# 1~Image Boot track only for *.gho
@@ -951,6 +1170,19 @@
# optional image description only *.gho
>>0xff string >\0 "%-.254s"
# look for DOS sector end sequence
->0xE08 search/7776 \x55\xAA
->>&-512 indirect x \b; contains
-
+>0xE08 search/7776 \x55\xAA
+>>&-512 indirect x \b; contains
+
+# Google Chrome extensions
+# https://developer.chrome.com/extensions/crx
+# https://developer.chrome.com/extensions/hosting
+0 string Cr24 Google Chrome extension
+!:mime application/x-chrome-extension
+>4 ulong x \b, version %u
+
+# SeqBox - Sequenced container
+# ext: sbx, seqbox
+# Marco Pontello marcopon@gmail.com
+# reference: https://github.com/MarcoPon/SeqBox
+0 string SBx SeqBox,
+>3 byte x version %d