I'm trying to put together a python script that can read the raw data from the back-up image of an ext4 partition and recognize when it sees an ext4 inode structure so that particular file can be recovered.
The purpose for such a script would be when all other methods do not work when a superblock is corrupted and your files cannot be recovered using magic numbers, magic bytes or known extension types.
The data that will be analyzed is created by successful execution of this command pointed at the appropriate partition:
dd if=/dev/sda of=partition.dd
The answer I am looking for will be python code that: reads raw data blocks one at a time from a dd.image and identify if that block of data is an ext4 inode block or not.
If I get help with this, I can use that data found in the inode to find the extent blocks that will allow for that file's full recovery. There doesn't seem to be anything out there like this that I have found so I'm sure this is a desperately needed missing tool.
I've been studying this:
https://www.kernel.org/doc/html/latest/filesystems/ext4/about.html
So far I have this and am updating based on an answer below:
#!/usr/bin/python import sys READ_BYTES = 512 SUPERBLOCK_SIZE = 1024 SUPERBLOCK_OFFSETS = [ 0x0, 0x4, 0x8, 0xC, 0x10, 0x14, 0x18, 0x1C, 0x20, 0x24, 0x28, 0x2C, 0x30, 0x34, 0x36, 0x38, 0x3A, 0x3C, 0x3E, 0x40, 0x44, 0x48, 0x4C, 0x50, 0x52, 0x54, 0x58, 0x5A, 0x5C, 0x60, 0x64, 0x68, 0x78, 0x88, 0xC8, 0xCC, 0xCD, 0xCE, 0xD0, 0xE0, 0xE4, 0xE8, 0xEC, 0xFC, 0xFD, 0xFE, 0x100, 0x104, 0x108, 0x10C, 0x150, 0x154, 0x158, 0x15C, 0x15E, 0x160, 0x164, 0x166, 0x168, 0x170, 0x174, 0x175, 0x176, 0x178, 0x180, 0x184, 0x188, 0x190, 0x194, 0x198, 0x19C, 0x1A0, 0x1A8, 0x1C8, 0x1CC, 0x1D0, 0x1D4, 0x1D8, 0x1E0, 0x200, 0x240, 0x244, 0x248, 0x24C, 0x254, 0x258, 0x268, 0x26C, 0x270, 0x274, 0x275, 0x276, 0x277, 0x278, 0x279, 0x27A, 0x27C, 0x27E, 0x280, 0x3FC ] SUPERBLOCK_MAGIC_NUMBER_OFFSET = SUPERBLOCK_OFFSETS[15] INODE_OFFSETS = [ 0x0, 0x2, 0x4, 0x8, 0xC, 0x10, 0x14, 0x18, 0x1A, 0x1C, 0x20, 0x24, 0x28, 0x64, 0x68, 0x6C, 0x70, 0x74, 0x80, 0x82, 0x84, 0x88, 0x8C, 0x90, 0x94, 0x98, 0x9C, ] INODE_MAGIC_NUMBER_OFFSET = SUPERBLOCK_OFFSETS[9] INODE_BLOCK_SIZE = INODE_OFFSETS[-1] + 32 class Partition: def __init__(self, path): self.path = path self.superblockMagicNumber = '\x53\xEF' self.superblocks = [] self.inodes = [] self.inodeMagicNumber = "\x0A\xF3" # self.inodeMagicNumber = '\x00\x00\x02\xEA' def findSuperblock(self): byteCount = 0 filePointer = open(self.path, 'rb') data = filePointer.read(READ_BYTES) byteCount += len(data) while len(data): if self.superblockMagicNumber in data: # print info when magic number found print 'found magic number in read block:', byteCount, " (" + str(READ_BYTES) + " bytes per block)" magicNumberPosition = data.find(self.superblockMagicNumber) print 'position in data block:', magicNumberPosition print 'hex offset:', hex(magicNumberPosition) print " ".join([d.encode('hex') for d in data]) # reset the file pointer to the begining of the superblock currentPosition = filePointer.tell() position = currentPosition - len(data) + magicNumberPosition - SUPERBLOCK_MAGIC_NUMBER_OFFSET filePointer.seek(position) superblockData = filePointer.read(SUPERBLOCK_SIZE) print "superblock data:" print " ".join([d.encode('hex') for d in superblockData]) # # use the offsets to gather and set the superblock values superblockArgs = [] for i in range(len(SUPERBLOCK_OFFSETS)-1) : arg = superblockData[SUPERBLOCK_OFFSETS[i] : SUPERBLOCK_OFFSETS[i+1]] superblockArgs.append(arg) arg = superblockData[SUPERBLOCK_OFFSETS[i+1] : SUPERBLOCK_SIZE] superblockArgs.append(arg) sb = Superblock(superblockArgs) for key, value in sb.__dict__.items(): values = [] for b in value: values.append(b.encode('hex')) print key, ":", " ".join(values) self.superblocks.append(sb) # reset the file pointer to end of data already read data = filePointer.read(READ_BYTES) byteCount += len(data) filePointer.close() def findInodes(self): byteCount = 0 filePointer = open(self.path, 'rb') data = filePointer.read(READ_BYTES) byteCount += len(data) while data != None and len(data): if self.inodeMagicNumber in data: # print info when magic number found print 'found magic number in read block:', byteCount, " (" + str(READ_BYTES) + " bytes per block)" magicNumberPosition = data.find(self.inodeMagicNumber) print 'position in data block:', magicNumberPosition print 'hex offset:', hex(magicNumberPosition) print " ".join([d.encode('hex') for d in data]) # reset the file pointer to the begining of the inode currentPosition = filePointer.tell() position = currentPosition - len(data) + magicNumberPosition - INODE_MAGIC_NUMBER_OFFSET filePointer.seek(position) indodeData = filePointer.read(INODE_BLOCK_SIZE) print "inode data:" print " ".join([d.encode('hex') for d in indodeData]) # # use the offsets to gather and set the inode values indodeArgs = [] for i in range(len(INODE_OFFSETS)-1) : arg = indodeData[INODE_OFFSETS[i] : INODE_OFFSETS[i+1]] indodeArgs.append(arg) arg = indodeData[INODE_OFFSETS[i+1] : INODE_BLOCK_SIZE] indodeArgs.append(arg) sb = Inode(indodeArgs) for key, value in sb.__dict__.items(): values = [] for b in value: values.append(b.encode('hex')) print key, ":", " ".join(values) self.inodes.append(sb) # reset the file pointer to end of data already read data = filePointer.read(READ_BYTES) byteCount += len(data) magicNumber = None filePointer.close() class Superblock: def __init__(self, args=[]): if len(args): self.inodeCount = args[0] self.blockCount = args[1] self.reservedBlockCount = args[2] self.freeBlockCount = args[3] self.freeInodeCount = args[4] self.firstDataBlock = args[5] self.logBlockSize = args[6] self.logClusterSize = args[7] self.blocksPerGroup = args[8] self.clustersPerGroup = args[9] self.inodesPerGroup = args[10] self.mountTime = args[11] self.writeTime = args[12] self.mountCount = args[13] self.maxMountCount = args[14] self.magic = args[15] self.state = args[16] self.errors = args[17] self.minorRevisionLevel = args[18] self.lastCheck = args[19] self.checkInterveal = args[20] self.creatorOS = args[21] self.revisionLevel = args[22] self.reservedBlocksUID = args[23] self.reservedBlocksDefaultGID = args[24] self.firstNonReservedInode = args[25] self.inodeSize = args[26] self.blockGroup = args[27] self.compatibleFeatures = args[28] self.incompatibleFeatures = args[29] self.readOnlyCompatibleFeatures = args[30] self.uuid = args[31] self.label = args[32] self.lastMounted = args[33] self.compression = args[34] self.preallocatedFileBlocks = args[35] self.preallocatedDirectoryBlocks = args[36] self.reservedGDTBlocks = args[37] self.journalUUID = args[38] self.journalInodeNumber = args[39] self.journalFileDeviceNumber = args[40] self.lastOrphan = args[41] self.hashSeed = args[42] self.hashVersion = args[43] self.journalBackupType = args[44] self.groupDescriptorSize = args[45] self.mountOptionsDefault = args[46] self.firstMetablockBlockGroup = args[47] self.makeFileSystemTime = args[48] self.journalInodesBackup = args[49] self.blockCountHigh = args[50] self.reserverdBlockCountHigh = args[51] self.freeBlockCountHigh = args[52] self.minimumInodeSize = args[53] self.newInodeReservationSize = args[54] self.miscFlags = args[55] self.raidStride = args[56] self.multiMountPreventionInterval = args[57] self.multiMountPreventionData = args[58] self.raidStripeWidth = args[59] self.flexibleBlockGroupSize = args[60] self.metadataChecksumAlgorithmType = args[61] self.reservedPad = args[62] self.kilobytesWritten = args[63] self.snapshotInodeNumber = args[64] self.snapshotID = args[65] self.snapshotReservedBlockCount = args[66] self.snapshotList = args[67] self.errorCount = args[68] self.firstErrorTime = args[69] self.firstErrorInode = args[70] self.firstErrorBlock = args[71] self.firstErrorFunction = args[72] self.firstErrorLine = args[73] self.lastErrorTime = args[74] self.lastErrorInode = args[75] self.lastErrorLine = args[76] self.lastErrorBlock = args[77] self.lastErrorFunction = args[78] self.mountOptions = args[79] self.inodeOfUserQuotaFile = args[80] self.infodeOfGroupQuotaFile = args[81] self.overheadBlocks = args[82] self.superblockBackups = args[83] self.encryptionAlgorithms = args[84] self.encryptionSalt = args[85] self.inodeLostAndFound = args[86] self.inodeProjectQuota = args[87] self.checksumSeed = args[88] self.wtimeHigh = args[89] self.mtimeHigh = args[90] self.makeFileSystemTimeHigh = args[91] self.lastCheckHigh = args[92] self.firstErrorTimeHigh = args[93] self.lastErrorTimeHigh = args[94] self.zeroPadding = args[95] self.encoding = args[96] self.encodingFlags = args[97] self.reservedPadding = args[98] self.checksum = args[99] else: self.inodeCount = None self.blockCount = None self.reservedBlockCount = None self.freeBlockCount = None self.freeInodeCount = None self.firstDataBlock = None self.logBlockSize = None self.logClusterSize = None self.blocksPerGroup = None self.clustersPerGroup = None self.inodesPerGroup = None self.mountTime = None self.writeTime = None self.mountCount = None self.maxMountCount = None self.magic = None self.state = None self.errors = None self.minorRevisionLevel = None self.lastCheck = None self.checkInterveal = None self.creatorOS = None self.revisionLevel = None self.reservedBlocksUID = None self.reservedBlocksDefaultGID = None self.firstNonReservedInode = None self.inodeSize = None self.blockGroup = None self.compatibleFeatures = None self.incompatibleFeatures = None self.readOnlyCompatibleFeatures = None self.uuid = None self.label = None self.lastMounted = None self.compression = None self.preallocatedFileBlocks = None self.preallocatedDirectoryBlocks = None self.reservedGDTBlocks = None self.journalUUID = None self.journalInodeNumber = None self.journalFileDeviceNumber = None self.lastOrphan = None self.hashSeed = None self.hashVersion = None self.journalBackupType = None self.groupDescriptorSize = None self.mountOptionsDefault = None self.firstMetablockBlockGroup = None self.makeFileSystemTime = None self.journalInodesBackup = None self.blockCountHigh = None self.reserverdBlockCountHigh = None self.freeBlockCountHigh = None self.minimumInodeSize = None self.newInodeReservationSize = None self.miscFlags = None self.raidStride = None self.multiMountPreventionInterval = None self.multiMountPreventionData = None self.raidStripeWidth = None self.flexibleBlockGroupSize = None self.metadataChecksumAlgorithmType = None self.reservedPad = None self.kilobytesWritten = None self.snapshotInodeNumber = None self.snapshotID = None self.snapshotReservedBlockCount = None self.snapshotList = None self.errorCount = None self.firstErrorTime = None self.firstErrorInode = None self.firstErrorBlock = None self.firstErrorFunction = None self.firstErrorLine = None self.lastErrorTime = None self.lastErrorInode = None self.lastErrorLine = None self.lastErrorBlock = None self.lastErrorFunction = None self.mountOptions = None self.inodeOfUserQuotaFile = None self.infodeOfGroupQuotaFile = None self.overheadBlocks = None self.superblockBackups = None self.encryptionAlgorithms = None self.encryptionSalt = None self.inodeLostAndFound = None self.inodeProjectQuota = None self.checksumSeed = None self.wtimeHigh = None self.mtimeHigh = None self.makeFileSystemTimeHigh = None self.lastCheckHigh = None self.firstErrorTimeHigh = None self.lastErrorTimeHigh = None self.zeroPadding = None self.encoding = None self.encodingFlags = None self.reservedPadding = None self.checksum = None class Inode: def __init__(self, args=[]): if len(args): self.fileMode = args[0] self.uidLow = args[1] self.sizeLow = args[2] self.accessTime = args[3] self.changeTime = args[4] self.modificationTime = args[5] self.deletionTime = args[6] self.gidLow = args[7] self.linkCount = args[8] self.blockCountLow = args[9] self.flags = args[10] self.osd1 = args[11] self.blockMap = args[12] self.fileVersion = args[13] self.extendedAttributeBlockLow = args[14] self.fileDirectorySizeHigh = args[15] self.fragmentAddress = args[16] self.osd2 = args[17] self.extraSize = args[18] self.checksumHigh = args[19] self.extraChangeTime = args[20] self.extraModificationTime = args[21] self.extraAccessTime = args[22] self.creationTime = args[23] self.versionHigh = args[24] self.projectID = args[25] else: self.fileMode = None self.uidLow = None self.sizeLow = None self.accessTime = None self.changeTime = None self.modificationTime = None self.deletionTime = None self.gidLow = None self.linkCount = None self.blockCountLow = None self.flags = None self.osd1 = None self.blockMap = None self.fileVersion = None self.extendedAttributeBlockLow = None self.fileDirectorySizeHigh = Non self.fragmentAddress = None self.osd2 = None self.extraSize = None self.checksumHigh = None self.extraChangeTime = None self.extraModificationTime = None self.extraAccessTime = None self.creationTime = None self.versionHigh = None self.projectID = None def printSize(self): p = Partition(sys.argv[1]) #p.findSuperblock() p.findInodes()