diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md index 90fc64cd..0ed3ebc7 100644 --- a/.github/copilot-instructions.md +++ b/.github/copilot-instructions.md @@ -13,6 +13,15 @@ RecursiveExtractor is a cross-platform .NET library and CLI tool for parsing arc ## Building and Testing +### Git Clone Depth + +⚠️ **Important**: This repository uses [Nerdbank.GitVersioning](https://github.com/dotnet/Nerdbank.GitVersioning) (NBGV) to calculate version numbers from git history. Shallow clones will cause the build to fail with a `GitException: Shallow clone lacks the objects required to calculate version height` error. If you encounter this, deepen the clone: +```bash +git fetch --unshallow +# or if that fails: +git fetch --depth=100 +``` + ### Build Commands ```bash # Build the entire solution diff --git a/RecursiveExtractor.Tests/ExtractorTests/ExpectedNumFilesTests.cs b/RecursiveExtractor.Tests/ExtractorTests/ExpectedNumFilesTests.cs index 7535716a..e36aead8 100644 --- a/RecursiveExtractor.Tests/ExtractorTests/ExpectedNumFilesTests.cs +++ b/RecursiveExtractor.Tests/ExtractorTests/ExpectedNumFilesTests.cs @@ -43,6 +43,9 @@ public static TheoryData ArchiveData { "TestDataArchivesNested.Zip", 54 }, { "UdfTest.iso", 3 }, { "UdfTestWithMultiSystem.iso", 3 }, + { "TestData.arj", 1 }, + { "TestData.arc", 1 }, + { "TestData.ace", 1 }, // { "HfsSampleUDCO.dmg", 2 } }; } @@ -75,6 +78,9 @@ public static TheoryData NoRecursionData { "EmptyFile.txt", 1 }, { "TestDataArchivesNested.Zip", 14 }, { "UdfTestWithMultiSystem.iso", 3 }, + { "TestData.arj", 1 }, + { "TestData.arc", 1 }, + { "TestData.ace", 1 }, // { "HfsSampleUDCO.dmg", 2 } }; } @@ -193,8 +199,7 @@ public void ExtractArchiveParallel(string fileName, int expectedNumFiles) var extractor = new Extractor(); var path = Path.Combine(Directory.GetCurrentDirectory(), "TestData", "TestDataArchives", fileName); var results = extractor.Extract(path, GetExtractorOptions(true)).ToList(); - var names = results.Select(x => x.FullPath); - var stringOfNames = string.Join("\n", names); + Assert.DoesNotContain(results, r => r.EntryStatus == FileEntryStatus.FailedArchive); Assert.Equal(expectedNumFiles, results.Count); } @@ -223,17 +228,22 @@ public async Task ExtractArchiveAsync(string fileName, int expectedNumFiles) [MemberData(nameof(ArchiveData))] public async Task ExtractArchiveFromStreamAsync(string fileName, int expectedNumFiles) { - var extractor = new Extractor(); + var extractor = new Extractor(); var path = Path.Combine(Directory.GetCurrentDirectory(), "TestData", "TestDataArchives", fileName); using var stream = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read); var results = extractor.ExtractAsync(path, stream, new ExtractorOptions()); var numFiles = 0; + var numFailed = 0; await foreach (var result in results) { numFiles++; + if (result.EntryStatus == FileEntryStatus.FailedArchive) + { + numFailed++; + } } + Assert.Equal(0, numFailed); Assert.Equal(expectedNumFiles, numFiles); - stream.Close(); } [Theory] @@ -243,8 +253,9 @@ public void ExtractArchiveFromStream(string fileName, int expectedNumFiles) var extractor = new Extractor(); var path = Path.Combine(Directory.GetCurrentDirectory(), "TestData", "TestDataArchives", fileName); using var stream = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read); - var results = extractor.Extract(path, stream, GetExtractorOptions()); - Assert.Equal(expectedNumFiles, results.Count()); + var resultsList = extractor.Extract(path, stream, GetExtractorOptions()).ToList(); + Assert.DoesNotContain(resultsList, r => r.EntryStatus == FileEntryStatus.FailedArchive); + Assert.Equal(expectedNumFiles, resultsList.Count); stream.Close(); } diff --git a/RecursiveExtractor.Tests/ExtractorTests/MiniMagicTests.cs b/RecursiveExtractor.Tests/ExtractorTests/MiniMagicTests.cs index 20e2d959..555975ab 100644 --- a/RecursiveExtractor.Tests/ExtractorTests/MiniMagicTests.cs +++ b/RecursiveExtractor.Tests/ExtractorTests/MiniMagicTests.cs @@ -24,6 +24,9 @@ public class MiniMagicTests [InlineData("Empty.vmdk", ArchiveFileType.VMDK)] [InlineData("HfsSampleUDCO.dmg", ArchiveFileType.DMG)] [InlineData("EmptyFile.txt", ArchiveFileType.UNKNOWN)] + [InlineData("TestData.arj", ArchiveFileType.ARJ)] + [InlineData("TestData.arc", ArchiveFileType.ARC)] + [InlineData("TestData.ace", ArchiveFileType.ACE)] public void TestMiniMagic(string fileName, ArchiveFileType expectedArchiveFileType) { var path = Path.Combine(Directory.GetCurrentDirectory(), "TestData", "TestDataArchives", fileName); diff --git a/RecursiveExtractor.Tests/RecursiveExtractor.Tests.csproj b/RecursiveExtractor.Tests/RecursiveExtractor.Tests.csproj index 5b57be59..30848fc1 100644 --- a/RecursiveExtractor.Tests/RecursiveExtractor.Tests.csproj +++ b/RecursiveExtractor.Tests/RecursiveExtractor.Tests.csproj @@ -301,6 +301,15 @@ PreserveNewest + + PreserveNewest + + + PreserveNewest + + + PreserveNewest + PreserveNewest diff --git a/RecursiveExtractor.Tests/TestData/TestDataArchives/TestData.ace b/RecursiveExtractor.Tests/TestData/TestDataArchives/TestData.ace new file mode 100644 index 00000000..36861e7e Binary files /dev/null and b/RecursiveExtractor.Tests/TestData/TestDataArchives/TestData.ace differ diff --git a/RecursiveExtractor.Tests/TestData/TestDataArchives/TestData.arc b/RecursiveExtractor.Tests/TestData/TestDataArchives/TestData.arc new file mode 100644 index 00000000..9c9e42a0 Binary files /dev/null and b/RecursiveExtractor.Tests/TestData/TestDataArchives/TestData.arc differ diff --git a/RecursiveExtractor.Tests/TestData/TestDataArchives/TestData.arj b/RecursiveExtractor.Tests/TestData/TestDataArchives/TestData.arj new file mode 100644 index 00000000..a0b16862 Binary files /dev/null and b/RecursiveExtractor.Tests/TestData/TestDataArchives/TestData.arj differ diff --git a/RecursiveExtractor/Extractor.cs b/RecursiveExtractor/Extractor.cs index 8b14fe2c..85d53e8a 100644 --- a/RecursiveExtractor/Extractor.cs +++ b/RecursiveExtractor/Extractor.cs @@ -85,6 +85,9 @@ public void SetDefaultExtractors() SetExtractor(ArchiveFileType.VMDK, new VmdkExtractor(this)); SetExtractor(ArchiveFileType.XZ, new XzExtractor(this)); SetExtractor(ArchiveFileType.ZIP, new ZipExtractor(this)); + SetExtractor(ArchiveFileType.ARJ, new ArjExtractor(this)); + SetExtractor(ArchiveFileType.ARC, new ArcExtractor(this)); + SetExtractor(ArchiveFileType.ACE, new AceExtractor(this)); if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) { SetExtractor(ArchiveFileType.WIM, new WimExtractor(this)); diff --git a/RecursiveExtractor/Extractors/AceExtractor.cs b/RecursiveExtractor/Extractors/AceExtractor.cs new file mode 100644 index 00000000..edc7930a --- /dev/null +++ b/RecursiveExtractor/Extractors/AceExtractor.cs @@ -0,0 +1,173 @@ +using SharpCompress.Readers; +using SharpCompress.Readers.Ace; +using System; +using System.Collections.Generic; +using System.IO; + +namespace Microsoft.CST.RecursiveExtractor.Extractors +{ + /// + /// The ACE Archive extractor implementation + /// + public class AceExtractor : AsyncExtractorInterface + { + /// + /// The constructor takes the Extractor context for recursion. + /// + /// The Extractor context. + public AceExtractor(Extractor context) + { + Context = context; + } + private readonly NLog.Logger Logger = NLog.LogManager.GetCurrentClassLogger(); + + internal Extractor Context { get; } + + /// + /// Extracts an ACE archive + /// + /// + public async IAsyncEnumerable ExtractAsync(FileEntry fileEntry, ExtractorOptions options, ResourceGovernor governor, bool topLevel = true) + { + AceReader? aceReader = null; + try + { + aceReader = AceReader.Open(fileEntry.Content, new ReaderOptions() + { + LeaveStreamOpen = true + }); + } + catch (Exception e) + { + Logger.Debug(Extractor.FAILED_PARSING_ERROR_MESSAGE_STRING, ArchiveFileType.ACE, fileEntry.FullPath, string.Empty, e.GetType()); + } + + if (aceReader != null) + { + using (aceReader) + { + while (aceReader.MoveToNextEntry()) + { + var entry = aceReader.Entry; + if (entry.IsDirectory) + { + continue; + } + + var name = entry.Key?.Replace('/', Path.DirectorySeparatorChar); + if (string.IsNullOrEmpty(name)) + { + Logger.Debug(Extractor.ENTRY_MISSING_NAME_ERROR_MESSAGE_STRING, ArchiveFileType.ACE, fileEntry.FullPath); + continue; + } + + governor.CheckResourceGovernor(entry.Size); + using var entryStream = aceReader.OpenEntryStream() + { + var newFileEntry = await FileEntry.FromStreamAsync(name, entryStream, fileEntry, entry.CreatedTime, entry.LastModifiedTime, entry.LastAccessedTime, memoryStreamCutoff: options.MemoryStreamCutoff).ConfigureAwait(false); + if (newFileEntry != null) + { + if (options.Recurse || topLevel) + { + await foreach (var innerEntry in Context.ExtractAsync(newFileEntry, options, governor, false)) + { + yield return innerEntry; + } + } + else + { + yield return newFileEntry; + } + } + } + } + } + } + else + { + if (options.ExtractSelfOnFail) + { + fileEntry.EntryStatus = FileEntryStatus.FailedArchive; + yield return fileEntry; + } + } + } + + /// + /// Extracts an ACE archive + /// + /// + public IEnumerable Extract(FileEntry fileEntry, ExtractorOptions options, ResourceGovernor governor, bool topLevel = true) + { + AceReader? aceReader = null; + try + { + aceReader = AceReader.Open(fileEntry.Content, new ReaderOptions() + { + LeaveStreamOpen = true + }); + } + catch (Exception e) + { + Logger.Debug(Extractor.FAILED_PARSING_ERROR_MESSAGE_STRING, ArchiveFileType.ACE, fileEntry.FullPath, string.Empty, e.GetType()); + } + + if (aceReader != null) + { + using (aceReader) + { + while (aceReader.MoveToNextEntry()) + { + var entry = aceReader.Entry; + if (entry.IsDirectory) + { + continue; + } + + FileEntry? newFileEntry = null; + try + { + governor.CheckResourceGovernor(entry.Size); + using var stream = aceReader.OpenEntryStream() + { + var name = entry.Key?.Replace('/', Path.DirectorySeparatorChar); + if (string.IsNullOrEmpty(name)) + { + Logger.Debug(Extractor.ENTRY_MISSING_NAME_ERROR_MESSAGE_STRING, ArchiveFileType.ACE, fileEntry.FullPath); + continue; + } + newFileEntry = new FileEntry(name, stream, fileEntry, false, entry.CreatedTime, entry.LastModifiedTime, entry.LastAccessedTime, memoryStreamCutoff: options.MemoryStreamCutoff); + } + } + catch (Exception e) + { + Logger.Debug(Extractor.FAILED_PARSING_ERROR_MESSAGE_STRING, ArchiveFileType.ACE, fileEntry.FullPath, entry.Key, e.GetType()); + } + if (newFileEntry != null) + { + if (options.Recurse || topLevel) + { + foreach (var innerEntry in Context.Extract(newFileEntry, options, governor, false)) + { + yield return innerEntry; + } + } + else + { + yield return newFileEntry; + } + } + } + } + } + else + { + if (options.ExtractSelfOnFail) + { + fileEntry.EntryStatus = FileEntryStatus.FailedArchive; + yield return fileEntry; + } + } + } + } +} diff --git a/RecursiveExtractor/Extractors/ArcExtractor.cs b/RecursiveExtractor/Extractors/ArcExtractor.cs new file mode 100644 index 00000000..00a5ee28 --- /dev/null +++ b/RecursiveExtractor/Extractors/ArcExtractor.cs @@ -0,0 +1,179 @@ +using SharpCompress.Readers; +using SharpCompress.Readers.Arc; +using System; +using System.Collections.Generic; +using System.IO; + +namespace Microsoft.CST.RecursiveExtractor.Extractors +{ + /// + /// The ARC Archive extractor implementation + /// + public class ArcExtractor : AsyncExtractorInterface + { + /// + /// The constructor takes the Extractor context for recursion. + /// + /// The Extractor context. + public ArcExtractor(Extractor context) + { + Context = context; + } + private readonly NLog.Logger Logger = NLog.LogManager.GetCurrentClassLogger(); + + internal Extractor Context { get; } + + /// + /// Extracts an ARC archive + /// + /// + public async IAsyncEnumerable ExtractAsync(FileEntry fileEntry, ExtractorOptions options, ResourceGovernor governor, bool topLevel = true) + { + ArcReader? arcReader = null; + try + { + arcReader = ArcReader.Open(fileEntry.Content, new ReaderOptions() + { + LeaveStreamOpen = true + }); + } + catch (Exception e) + { + Logger.Debug(Extractor.FAILED_PARSING_ERROR_MESSAGE_STRING, ArchiveFileType.ARC, fileEntry.FullPath, string.Empty, e.GetType()); + } + + if (arcReader != null) + { + using (arcReader) + { + while (arcReader.MoveToNextEntry()) + { + var entry = arcReader.Entry; + if (entry.IsDirectory) + { + continue; + } + + var name = entry.Key?.Replace('/', Path.DirectorySeparatorChar); + if (string.IsNullOrEmpty(name)) + { + Logger.Debug(Extractor.ENTRY_MISSING_NAME_ERROR_MESSAGE_STRING, ArchiveFileType.ARC, fileEntry.FullPath); + continue; + } + + using var entryStream = arcReader.OpenEntryStream() + { + var newFileEntry = await FileEntry.FromStreamAsync(name, entryStream, fileEntry, entry.CreatedTime, entry.LastModifiedTime, entry.LastAccessedTime, memoryStreamCutoff: options.MemoryStreamCutoff).ConfigureAwait(false); + if (newFileEntry != null) + { + // SharpCompress ARC does not expose entry sizes, so we check the resource governor + // after extraction using the actual decompressed content length. + governor.CheckResourceGovernor(newFileEntry.Content.Length); + + if (options.Recurse || topLevel) + { + await foreach (var innerEntry in Context.ExtractAsync(newFileEntry, options, governor, false)) + { + yield return innerEntry; + } + } + else + { + yield return newFileEntry; + } + } + } + } + } + } + else + { + if (options.ExtractSelfOnFail) + { + fileEntry.EntryStatus = FileEntryStatus.FailedArchive; + yield return fileEntry; + } + } + } + + /// + /// Extracts an ARC archive + /// + /// + public IEnumerable Extract(FileEntry fileEntry, ExtractorOptions options, ResourceGovernor governor, bool topLevel = true) + { + ArcReader? arcReader = null; + try + { + arcReader = ArcReader.Open(fileEntry.Content, new ReaderOptions() + { + LeaveStreamOpen = true + }); + } + catch (Exception e) + { + Logger.Debug(Extractor.FAILED_PARSING_ERROR_MESSAGE_STRING, ArchiveFileType.ARC, fileEntry.FullPath, string.Empty, e.GetType()); + } + + if (arcReader != null) + { + using (arcReader) + { + while (arcReader.MoveToNextEntry()) + { + var entry = arcReader.Entry; + if (entry.IsDirectory) + { + continue; + } + + FileEntry? newFileEntry = null; + try + { + using var stream = arcReader.OpenEntryStream() + { + var name = entry.Key?.Replace('/', Path.DirectorySeparatorChar); + if (string.IsNullOrEmpty(name)) + { + Logger.Debug(Extractor.ENTRY_MISSING_NAME_ERROR_MESSAGE_STRING, ArchiveFileType.ARC, fileEntry.FullPath); + continue; + } + newFileEntry = new FileEntry(name, stream, fileEntry, false, entry.CreatedTime, entry.LastModifiedTime, entry.LastAccessedTime, memoryStreamCutoff: options.MemoryStreamCutoff); + } + } + catch (Exception e) + { + Logger.Debug(Extractor.FAILED_PARSING_ERROR_MESSAGE_STRING, ArchiveFileType.ARC, fileEntry.FullPath, entry.Key, e.GetType()); + } + if (newFileEntry != null) + { + // SharpCompress ARC does not expose entry sizes, so we check the resource governor + // after extraction using the actual decompressed content length. + governor.CheckResourceGovernor(newFileEntry.Content.Length); + + if (options.Recurse || topLevel) + { + foreach (var innerEntry in Context.Extract(newFileEntry, options, governor, false)) + { + yield return innerEntry; + } + } + else + { + yield return newFileEntry; + } + } + } + } + } + else + { + if (options.ExtractSelfOnFail) + { + fileEntry.EntryStatus = FileEntryStatus.FailedArchive; + yield return fileEntry; + } + } + } + } +} diff --git a/RecursiveExtractor/Extractors/ArjExtractor.cs b/RecursiveExtractor/Extractors/ArjExtractor.cs new file mode 100644 index 00000000..6cf6881e --- /dev/null +++ b/RecursiveExtractor/Extractors/ArjExtractor.cs @@ -0,0 +1,173 @@ +using SharpCompress.Readers; +using SharpCompress.Readers.Arj; +using System; +using System.Collections.Generic; +using System.IO; + +namespace Microsoft.CST.RecursiveExtractor.Extractors +{ + /// + /// The ARJ Archive extractor implementation + /// + public class ArjExtractor : AsyncExtractorInterface + { + /// + /// The constructor takes the Extractor context for recursion. + /// + /// The Extractor context. + public ArjExtractor(Extractor context) + { + Context = context; + } + private readonly NLog.Logger Logger = NLog.LogManager.GetCurrentClassLogger(); + + internal Extractor Context { get; } + + /// + /// Extracts an ARJ archive + /// + /// + public async IAsyncEnumerable ExtractAsync(FileEntry fileEntry, ExtractorOptions options, ResourceGovernor governor, bool topLevel = true) + { + ArjReader? arjReader = null; + try + { + arjReader = ArjReader.Open(fileEntry.Content, new ReaderOptions() + { + LeaveStreamOpen = true + }); + } + catch (Exception e) + { + Logger.Debug(Extractor.FAILED_PARSING_ERROR_MESSAGE_STRING, ArchiveFileType.ARJ, fileEntry.FullPath, string.Empty, e.GetType()); + } + + if (arjReader != null) + { + using (arjReader) + { + while (arjReader.MoveToNextEntry()) + { + var entry = arjReader.Entry; + if (entry.IsDirectory) + { + continue; + } + + governor.CheckResourceGovernor(entry.Size); + var name = entry.Key?.Replace('/', Path.DirectorySeparatorChar); + if (string.IsNullOrEmpty(name)) + { + Logger.Debug(Extractor.ENTRY_MISSING_NAME_ERROR_MESSAGE_STRING, ArchiveFileType.ARJ, fileEntry.FullPath); + continue; + } + + using (var entryStream = arjReader.OpenEntryStream()) + { + var newFileEntry = await FileEntry.FromStreamAsync(name, entryStream, fileEntry, entry.CreatedTime, entry.LastModifiedTime, entry.LastAccessedTime, memoryStreamCutoff: options.MemoryStreamCutoff).ConfigureAwait(false); + if (newFileEntry != null) + { + if (options.Recurse || topLevel) + { + await foreach (var innerEntry in Context.ExtractAsync(newFileEntry, options, governor, false)) + { + yield return innerEntry; + } + } + else + { + yield return newFileEntry; + } + } + } + } + } + } + else + { + if (options.ExtractSelfOnFail) + { + fileEntry.EntryStatus = FileEntryStatus.FailedArchive; + yield return fileEntry; + } + } + } + + /// + /// Extracts an ARJ archive + /// + /// + public IEnumerable Extract(FileEntry fileEntry, ExtractorOptions options, ResourceGovernor governor, bool topLevel = true) + { + ArjReader? arjReader = null; + try + { + arjReader = ArjReader.Open(fileEntry.Content, new ReaderOptions() + { + LeaveStreamOpen = true + }); + } + catch (Exception e) + { + Logger.Debug(Extractor.FAILED_PARSING_ERROR_MESSAGE_STRING, ArchiveFileType.ARJ, fileEntry.FullPath, string.Empty, e.GetType()); + } + + if (arjReader != null) + { + using (arjReader) + { + while (arjReader.MoveToNextEntry()) + { + var entry = arjReader.Entry; + if (entry.IsDirectory) + { + continue; + } + + governor.CheckResourceGovernor(entry.Size); + FileEntry? newFileEntry = null; + try + { + using var stream = arjReader.OpenEntryStream() + { + var name = entry.Key?.Replace('/', Path.DirectorySeparatorChar); + if (string.IsNullOrEmpty(name)) + { + Logger.Debug(Extractor.ENTRY_MISSING_NAME_ERROR_MESSAGE_STRING, ArchiveFileType.ARJ, fileEntry.FullPath); + continue; + } + newFileEntry = new FileEntry(name, stream, fileEntry, false, entry.CreatedTime, entry.LastModifiedTime, entry.LastAccessedTime, memoryStreamCutoff: options.MemoryStreamCutoff); + } + } + catch (Exception e) + { + Logger.Debug(Extractor.FAILED_PARSING_ERROR_MESSAGE_STRING, ArchiveFileType.ARJ, fileEntry.FullPath, entry.Key, e.GetType()); + } + if (newFileEntry != null) + { + if (options.Recurse || topLevel) + { + foreach (var innerEntry in Context.Extract(newFileEntry, options, governor, false)) + { + yield return innerEntry; + } + } + else + { + yield return newFileEntry; + } + } + } + } + } + else + { + if (options.ExtractSelfOnFail) + { + fileEntry.EntryStatus = FileEntryStatus.FailedArchive; + yield return fileEntry; + } + } + } + } +} diff --git a/RecursiveExtractor/Extractors/SevenZipExtractor.cs b/RecursiveExtractor/Extractors/SevenZipExtractor.cs index 2f1e3a53..887f55dd 100644 --- a/RecursiveExtractor/Extractors/SevenZipExtractor.cs +++ b/RecursiveExtractor/Extractors/SevenZipExtractor.cs @@ -105,6 +105,7 @@ public async IAsyncEnumerable ExtractAsync(FileEntry fileEntry, Extra { try { + fileEntry.Content.Position = 0; sevenZipArchive = SevenZipArchive.Open(fileEntry.Content, new SharpCompress.Readers.ReaderOptions() { Password = password }); // When filenames are encrypted we can't access the size of individual files // But if we can accesss the total uncompressed size we have the right password diff --git a/RecursiveExtractor/MiniMagic.cs b/RecursiveExtractor/MiniMagic.cs index 64c914d7..f5ea2362 100644 --- a/RecursiveExtractor/MiniMagic.cs +++ b/RecursiveExtractor/MiniMagic.cs @@ -85,6 +85,18 @@ public enum ArchiveFileType /// DMG, /// + /// An ARJ compressed archive. + /// + ARJ, + /// + /// An ARC compressed archive. + /// + ARC, + /// + /// An ACE compressed archive. + /// + ACE, + /// /// Unused. /// INVALID @@ -120,7 +132,7 @@ public static ArchiveFileType DetectFileType(Stream fileStream) return ArchiveFileType.UNKNOWN; } var initialPosition = fileStream.Position; - var buffer = new byte[9]; + var buffer = new byte[14]; // DMG format uses the magic value 'koly' at the start of the 512 byte footer at the end of the file // Due to compression used, needs to be first or can be misidentified as other formats // https://newosxbook.com/DMG.html @@ -137,42 +149,73 @@ public static ArchiveFileType DetectFileType(Stream fileStream) } } - if (fileStream.Length >= 9) + var bytesRead = 0; + if (fileStream.Length >= 2) { + var toRead = (int)Math.Min(fileStream.Length, buffer.Length); fileStream.Position = 0; - fileStream.ReadExactly(buffer, 0, 9); + fileStream.ReadExactly(buffer, 0, toRead); fileStream.Position = initialPosition; + bytesRead = toRead; - if (buffer[0] == 0x50 && buffer[1] == 0x4B && buffer[2] == 0x03 && buffer[3] == 0x04) + if (buffer[0] == 0x1F && buffer[1] == 0x8B) { - return ArchiveFileType.ZIP; + return ArchiveFileType.GZIP; + } + // ARJ archive header starts with 0x60, 0xEA + if (buffer[0] == 0x60 && buffer[1] == 0xEA) + { + return ArchiveFileType.ARJ; } + // ARC archive: marker byte 0x1A, then compression method (valid: 0x01-0x09 or 0x7F) + if (buffer[0] == 0x1A && ((buffer[1] >= 0x01 && buffer[1] <= 0x09) || buffer[1] == 0x7F)) + { + return ArchiveFileType.ARC; + } + } - if (buffer[0] == 0x1F && buffer[1] == 0x8B) + if (bytesRead >= 3) + { + if (buffer[0] == 0x42 && buffer[1] == 0x5A && buffer[2] == 0x68) { - return ArchiveFileType.GZIP; + return ArchiveFileType.BZIP2; + } + } + + if (bytesRead >= 4) + { + if (buffer[0] == 0x50 && buffer[1] == 0x4B && buffer[2] == 0x03 && buffer[3] == 0x04) + { + return ArchiveFileType.ZIP; } + } + if (bytesRead >= 6) + { if (buffer[0] == 0xFD && buffer[1] == 0x37 && buffer[2] == 0x7A && buffer[3] == 0x58 && buffer[4] == 0x5A && buffer[5] == 0x00) { return ArchiveFileType.XZ; } - if (buffer[0] == 0x42 && buffer[1] == 0x5A && buffer[2] == 0x68) + if (buffer[0] == 0x37 && buffer[1] == 0x7A && buffer[2] == 0xBC && buffer[3] == 0xAF && buffer[4] == 0x27 && buffer[5] == 0x1C) { - return ArchiveFileType.BZIP2; + return ArchiveFileType.P7ZIP; } + } + + if (bytesRead >= 7) + { if (buffer[0] == 0x52 && buffer[1] == 0x61 && buffer[2] == 0x72 && buffer[3] == 0x21 && buffer[4] == 0x1A && buffer[5] == 0x07 && buffer[6] == 0x00) { return ArchiveFileType.RAR; } + } + + if (bytesRead >= 8) + { if (buffer[0] == 0x52 && buffer[1] == 0x61 && buffer[2] == 0x72 && buffer[3] == 0x21 && buffer[4] == 0x1A && buffer[5] == 0x07 && buffer[6] == 0x01 && buffer[7] == 0x00) { return ArchiveFileType.RAR5; } - if (buffer[0] == 0x37 && buffer[1] == 0x7A && buffer[2] == 0xBC && buffer[3] == 0xAF && buffer[4] == 0x27 && buffer[5] == 0x1C) - { - return ArchiveFileType.P7ZIP; - } if (Encoding.ASCII.GetString(buffer[0..8]) == "MSWIM\0\0\0" || Encoding.ASCII.GetString(buffer[0..8]) == "WLPWM\0\0\0") { return ArchiveFileType.WIM; @@ -229,6 +272,12 @@ public static ArchiveFileType DetectFileType(Stream fileStream) } } + // ACE archive: signature "**ACE**" at offset 7 + if (bytesRead >= 14 && buffer[7] == 0x2A && buffer[8] == 0x2A && buffer[9] == 0x41 && buffer[10] == 0x43 && buffer[11] == 0x45 && buffer[12] == 0x2A && buffer[13] == 0x2A) + { + return ArchiveFileType.ACE; + } + if (fileStream.Length >= 262) { fileStream.Position = 257; diff --git a/RecursiveExtractor/RecursiveExtractor.csproj b/RecursiveExtractor/RecursiveExtractor.csproj index a45aeb8c..1b6bd271 100644 --- a/RecursiveExtractor/RecursiveExtractor.csproj +++ b/RecursiveExtractor/RecursiveExtractor.csproj @@ -48,7 +48,7 @@ - +