diff --git a/blocks/Block.cs b/blocks/Block.cs index 665c94fe..5f0d449b 100644 --- a/blocks/Block.cs +++ b/blocks/Block.cs @@ -105,7 +105,7 @@ namespace de4dot.blocks { } public void remove(IEnumerable indexes) { - var instrsToDelete = new List(indexes); + var instrsToDelete = new List(Utils.unique(indexes)); instrsToDelete.Sort(); instrsToDelete.Reverse(); foreach (var index in instrsToDelete) diff --git a/blocks/Utils.cs b/blocks/Utils.cs index 23709272..da1d63ca 100644 --- a/blocks/Utils.cs +++ b/blocks/Utils.cs @@ -41,5 +41,13 @@ namespace de4dot.blocks { olist.Add(l); return olist; } + + public static IEnumerable unique(IEnumerable values) { + // HashSet is only available in .NET 3.5 and later. + var dict = new Dictionary(); + foreach (var val in values) + dict[val] = true; + return dict.Keys; + } } } diff --git a/blocks/blocks.csproj b/blocks/blocks.csproj index 5f5137a6..fb0ad203 100644 --- a/blocks/blocks.csproj +++ b/blocks/blocks.csproj @@ -37,6 +37,7 @@ + diff --git a/blocks/cflow/BlocksCflowDeobfuscator.cs b/blocks/cflow/BlocksCflowDeobfuscator.cs index 749e4ee9..0c1937a3 100644 --- a/blocks/cflow/BlocksCflowDeobfuscator.cs +++ b/blocks/cflow/BlocksCflowDeobfuscator.cs @@ -38,6 +38,7 @@ namespace de4dot.blocks.cflow { public void deobfuscate() { var allBlocks = new List(); var switchCflowDeobfuscator = new SwitchCflowDeobfuscator(); + var deadCodeRemover = new DeadCodeRemover(); bool changed; do { changed = false; @@ -57,6 +58,9 @@ namespace de4dot.blocks.cflow { switchCflowDeobfuscator.init(blocks, allBlocks); changed |= switchCflowDeobfuscator.deobfuscate(); + + deadCodeRemover.init(allBlocks); + changed |= deadCodeRemover.remove(); } while (changed); } diff --git a/blocks/cflow/DeadCodeRemover.cs b/blocks/cflow/DeadCodeRemover.cs new file mode 100644 index 00000000..96b6c6d1 --- /dev/null +++ b/blocks/cflow/DeadCodeRemover.cs @@ -0,0 +1,366 @@ +/* + Copyright (C) 2011 de4dot@gmail.com + + This file is part of de4dot. + + de4dot is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + de4dot is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with de4dot. If not, see . +*/ + +using System.Collections.Generic; +using Mono.Cecil.Cil; + +namespace de4dot.blocks.cflow { + class DeadCodeRemover { + List allBlocks; + List allDeadInstructions = new List(); + InstructionExpressionFinder instructionExpressionFinder = new InstructionExpressionFinder(); + + public void init(List allBlocks) { + this.allBlocks = allBlocks; + } + + public bool remove() { + bool changed = false; + + foreach (var block in allBlocks) + changed |= remove(block); + + return changed; + } + + bool remove(Block block) { + allDeadInstructions.Clear(); + + var instructions = block.Instructions; + for (int i = 0; i < instructions.Count; i++) { + var instr = instructions[i]; + if (instr.OpCode.Code != Code.Pop) + continue; + + instructionExpressionFinder.init(block, false); + if (!instructionExpressionFinder.find(i)) + continue; + if (!okInstructions(block, instructionExpressionFinder.DeadInstructions)) + continue; + allDeadInstructions.AddRange(instructionExpressionFinder.DeadInstructions); + } + + block.remove(allDeadInstructions); + return allDeadInstructions.Count > 0; + } + + bool okInstructions(Block block, IEnumerable indexes) { + foreach (var index in indexes) { + var instr = block.Instructions[index]; + switch (instr.OpCode.Code) { + case Code.Add: + case Code.Add_Ovf: + case Code.Add_Ovf_Un: + case Code.And: + case Code.Arglist: + case Code.Beq: + case Code.Beq_S: + case Code.Bge: + case Code.Bge_S: + case Code.Bge_Un: + case Code.Bge_Un_S: + case Code.Bgt: + case Code.Bgt_S: + case Code.Bgt_Un: + case Code.Bgt_Un_S: + case Code.Ble: + case Code.Ble_S: + case Code.Ble_Un: + case Code.Ble_Un_S: + case Code.Blt: + case Code.Blt_S: + case Code.Blt_Un: + case Code.Blt_Un_S: + case Code.Bne_Un: + case Code.Bne_Un_S: + case Code.Box: + case Code.Br: + case Code.Br_S: + case Code.Break: + case Code.Brfalse: + case Code.Brfalse_S: + case Code.Brtrue: + case Code.Brtrue_S: + case Code.Castclass: + case Code.Ceq: + case Code.Cgt: + case Code.Cgt_Un: + case Code.Ckfinite: + case Code.Clt: + case Code.Clt_Un: + case Code.Constrained: + case Code.Conv_I: + case Code.Conv_I1: + case Code.Conv_I2: + case Code.Conv_I4: + case Code.Conv_I8: + case Code.Conv_Ovf_I: + case Code.Conv_Ovf_I1: + case Code.Conv_Ovf_I1_Un: + case Code.Conv_Ovf_I2: + case Code.Conv_Ovf_I2_Un: + case Code.Conv_Ovf_I4: + case Code.Conv_Ovf_I4_Un: + case Code.Conv_Ovf_I8: + case Code.Conv_Ovf_I8_Un: + case Code.Conv_Ovf_I_Un: + case Code.Conv_Ovf_U: + case Code.Conv_Ovf_U1: + case Code.Conv_Ovf_U1_Un: + case Code.Conv_Ovf_U2: + case Code.Conv_Ovf_U2_Un: + case Code.Conv_Ovf_U4: + case Code.Conv_Ovf_U4_Un: + case Code.Conv_Ovf_U8: + case Code.Conv_Ovf_U8_Un: + case Code.Conv_Ovf_U_Un: + case Code.Conv_R4: + case Code.Conv_R8: + case Code.Conv_R_Un: + case Code.Conv_U: + case Code.Conv_U1: + case Code.Conv_U2: + case Code.Conv_U4: + case Code.Conv_U8: + case Code.Div: + case Code.Div_Un: + case Code.Dup: + case Code.Endfilter: + case Code.Endfinally: + case Code.Isinst: + case Code.Jmp: + case Code.Ldarg: + case Code.Ldarg_0: + case Code.Ldarg_1: + case Code.Ldarg_2: + case Code.Ldarg_3: + case Code.Ldarg_S: + case Code.Ldarga: + case Code.Ldarga_S: + case Code.Ldc_I4: + case Code.Ldc_I4_0: + case Code.Ldc_I4_1: + case Code.Ldc_I4_2: + case Code.Ldc_I4_3: + case Code.Ldc_I4_4: + case Code.Ldc_I4_5: + case Code.Ldc_I4_6: + case Code.Ldc_I4_7: + case Code.Ldc_I4_8: + case Code.Ldc_I4_M1: + case Code.Ldc_I4_S: + case Code.Ldc_I8: + case Code.Ldc_R4: + case Code.Ldc_R8: + case Code.Ldelem_Any: + case Code.Ldelem_I: + case Code.Ldelem_I1: + case Code.Ldelem_I2: + case Code.Ldelem_I4: + case Code.Ldelem_I8: + case Code.Ldelem_R4: + case Code.Ldelem_R8: + case Code.Ldelem_Ref: + case Code.Ldelem_U1: + case Code.Ldelem_U2: + case Code.Ldelem_U4: + case Code.Ldelema: + case Code.Ldfld: + case Code.Ldflda: + case Code.Ldftn: + case Code.Ldind_I: + case Code.Ldind_I1: + case Code.Ldind_I2: + case Code.Ldind_I4: + case Code.Ldind_I8: + case Code.Ldind_R4: + case Code.Ldind_R8: + case Code.Ldind_Ref: + case Code.Ldind_U1: + case Code.Ldind_U2: + case Code.Ldind_U4: + case Code.Ldlen: + case Code.Ldloc: + case Code.Ldloc_0: + case Code.Ldloc_1: + case Code.Ldloc_2: + case Code.Ldloc_3: + case Code.Ldloc_S: + case Code.Ldloca: + case Code.Ldloca_S: + case Code.Ldnull: + case Code.Ldobj: + case Code.Ldsfld: + case Code.Ldsflda: + case Code.Ldstr: + case Code.Ldtoken: + case Code.Ldvirtftn: + case Code.Leave: + case Code.Leave_S: + case Code.Localloc: + case Code.Mkrefany: + case Code.Mul: + case Code.Mul_Ovf: + case Code.Mul_Ovf_Un: + case Code.Neg: + case Code.Newarr: + case Code.No: + case Code.Nop: + case Code.Not: + case Code.Or: + case Code.Pop: + case Code.Readonly: + case Code.Refanytype: + case Code.Refanyval: + case Code.Rem: + case Code.Rem_Un: + case Code.Ret: + case Code.Rethrow: + case Code.Shl: + case Code.Shr: + case Code.Shr_Un: + case Code.Sizeof: + case Code.Sub: + case Code.Sub_Ovf: + case Code.Sub_Ovf_Un: + case Code.Switch: + case Code.Tail: + case Code.Throw: + case Code.Unaligned: + case Code.Unbox: + case Code.Unbox_Any: + case Code.Volatile: + case Code.Xor: + break; + + case Code.Call: + case Code.Calli: + case Code.Callvirt: + case Code.Cpblk: + case Code.Cpobj: + case Code.Initblk: + case Code.Initobj: + case Code.Newobj: + case Code.Starg: + case Code.Starg_S: + case Code.Stelem_Any: + case Code.Stelem_I: + case Code.Stelem_I1: + case Code.Stelem_I2: + case Code.Stelem_I4: + case Code.Stelem_I8: + case Code.Stelem_R4: + case Code.Stelem_R8: + case Code.Stelem_Ref: + case Code.Stfld: + case Code.Stind_I: + case Code.Stind_I1: + case Code.Stind_I2: + case Code.Stind_I4: + case Code.Stind_I8: + case Code.Stind_R4: + case Code.Stind_R8: + case Code.Stind_Ref: + case Code.Stloc: + case Code.Stloc_0: + case Code.Stloc_1: + case Code.Stloc_2: + case Code.Stloc_3: + case Code.Stloc_S: + case Code.Stobj: + case Code.Stsfld: + default: + return false; + } + } + + return true; + } + + class InstructionExpressionFinder { + List deadInstructions = new List(); + Block block; + bool methodHasReturnValue; + + public List DeadInstructions { + get { return deadInstructions; } + } + + public void init(Block block, bool methodHasReturnValue) { + deadInstructions.Clear(); + this.block = block; + this.methodHasReturnValue = methodHasReturnValue; + } + + public bool find(int index) { + return find(ref index, true); + } + + bool find(ref int index, bool addIt) { + if (index < 0) + return false; + + if (addIt) + addIndex(index); + + var startInstr = block.Instructions[index]; + int startInstrPushes, startInstrPops; + DotNetUtils.calculateStackUsage(startInstr.Instruction, false, out startInstrPushes, out startInstrPops); + + if (startInstrPops == 0) + return true; + if (startInstrPops < 0) + return false; // Eg. leave + + while (index > 0) { + var instr = block.Instructions[index - 1]; + if (startInstrPops == 0 && instr.OpCode.OpCodeType != OpCodeType.Prefix) + break; + index--; + + int pushes, pops; + DotNetUtils.calculateStackUsage(instr.Instruction, methodHasReturnValue, out pushes, out pops); + if (pops < 0) + return false; // eg. leave + + if (pops > 0) { // if instr uses any args + bool otherExpr = pops > 0 && pushes == 0; + if (!find(ref index, addIt && !otherExpr)) + return false; + } + else { + if (addIt) + addIndex(index); + } + if (pushes > 0) { + if (pushes > startInstrPops) + return false; + startInstrPops -= pushes; + } + } + + return startInstrPops == 0; + } + + void addIndex(int index) { + deadInstructions.Add(index); + } + } + } +}