/* ctojr.h -- filter CTO implementation; renumber destinations MRU This file is part of the UPX executable compressor. Copyright (C) 1996-2023 Markus Franz Xaver Johannes Oberhumer Copyright (C) 1996-2023 Laszlo Molnar Copyright (C) 2000-2023 John F. Reiser All Rights Reserved. UPX and the UCL library are free software; you can redistribute them and/or modify them under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; see the file COPYING. If not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. Markus F.X.J. Oberhumer Laszlo Molnar John F. Reiser */ #if (ACC_CC_MSC && (_MSC_VER >= 1000 && _MSC_VER < 1300)) # pragma warning(disable: 4702) // W4: unreachable code #endif /************************************************************************* // filter / scan **************************************************************************/ #ifdef U //{ #define NOFILT 0 // no filter #define FNOMRU 1 // filter, but not using mru #define MRUFLT 2 // mru filter static unsigned f80_call(Filter const *f) { return (1+ (0x0f & f->id)) % 3; } static unsigned f80_jmp1(Filter const *f) { return ((1+ (0x0f & f->id)) / 3) % 3; } static unsigned f80_jcc2(Filter const *f) { return f80_jmp1(f); } #define N_MRU 32 // does not have to be a power of 2 // Adaptively remember recent destinations. static void update_mru( const unsigned jc, // destination address const int kh, // mru[kh] is slot where found unsigned mru[N_MRU], // circular buffer of most recent destinations int &hand, // mru[hand] is most recent destination int &tail // mru[tail] is beyond oldest destination ("cold cache" startup) ) { if (0 > --hand) { hand = N_MRU -1; } const unsigned t = mru[hand]; // entry which will be overwritten by jc if (0!=t) { // have seen at least N_MRU destinations mru[kh] = t; } else { // "cold cache": keep active region contiguous if (0 > --tail) { tail = N_MRU -1; } const unsigned t2 = mru[tail]; mru[tail] = 0; mru[kh] = t2; } mru[hand] = jc; } #endif //} static int F(Filter *f) { #ifdef U // filter byte *const b = f->buf; #else // scan const byte *b = f->buf; #endif const unsigned size = f->buf_len; unsigned ic, jc, kc; unsigned calls = 0, noncalls = 0, noncalls2 = 0; unsigned lastnoncall = size, lastcall = 0; unsigned wtally[3]; memset(wtally, 0, sizeof(wtally)); #ifdef U //{ const unsigned f_call = f80_call(f); const unsigned f_jmp1 = f80_jmp1(f); const unsigned f_jcc2 = f80_jcc2(f); int hand = 0, tail = 0; unsigned mru[N_MRU]; memset(&mru[0], 0, sizeof(mru)); assert(N_MRU<=256); f->n_mru = (MRUFLT==f_call || MRUFLT==f_jmp1 || MRUFLT==f_jcc2) ? N_MRU : 0; #endif //} // FIXME: We must fit into 8 MiB because we steal one bit. // find a 16 MiB large empty address space { int which; unsigned char buf[256]; memset(buf,0,256); for (ic = 0; ic < size - 5; ic++) if (CONDF(which,b,ic,lastcall) && get_le32(b+ic+1)+ic+1 >= size) { buf[b[ic+1]] |= 1; } UNUSED(which); if (getcto(f, buf) < 0) return -1; } const unsigned char cto8 = f->cto; #ifdef U const unsigned cto = (unsigned)f->cto << 24; #endif for (ic = 0; ic < size - 5; ic++) { int which; int f_on = 0; if (!CONDF(which,b,ic,lastcall)) continue; ++wtally[which]; jc = get_le32(b+ic+1)+ic+1; // try to detect 'real' calls only if (jc < size) { #ifdef U if (2==which && NOFILT!=f_jcc2) { // 6-byte Jcc // Prefix 0x0f is constant, but opcode condition 0x80..0x8f // varies. Because we store the destination (or its mru index) // in be32 big endian format, the low-addressed bytes // will tend to be constant. Swap prefix and opcode // so that constants are together for better compression. unsigned char const t = b[ic-1]; b[ic-1] = b[ic]; b[ic] = t; } // FIXME [?]: Extend to 8 bytes if "ADD ESP, byte 4*n" follows CALL. // This will require two related cto's (consecutive, or otherwise). if ((0==which && MRUFLT==f_call) || (1==which && MRUFLT==f_jmp1) || (2==which && MRUFLT==f_jcc2) ) { f_on = 1; // Recode the destination: narrower mru indices // should compress better than wider addresses. // (But not when offset of match is unlimited?) int k; for (k = 0; k < N_MRU; ++k) { int kh = hand + k; if (N_MRU <= kh) { kh -= N_MRU; } if (mru[kh] == jc) { // destination was seen recently set_be32(b+ic+1,((k<<1)|0)+cto); update_mru(jc, kh, mru, hand, tail); break; } } if (k == N_MRU) { // loop failed; jc is not in mru[] set_be32(b+ic+1,((jc<<1)|1)+cto); // Adaptively remember recent destinations. if (0 > --hand) { hand = N_MRU -1; } mru[hand] = jc; } } else if ((0==which && NOFILT!=f_call) || (1==which && NOFILT!=f_jmp1) || (2==which && NOFILT!=f_jcc2) ) { f_on = 1; set_be32(b+ic+1, jc+cto); } #endif if (f_on) { if (ic - lastnoncall < 5) { // check the last 4 bytes before this call for (kc = 4; kc; kc--) if (CONDF(which,b,ic-kc,lastcall) && b[ic-kc+1] == cto8) break; if (kc) { #ifdef U // restore original if (2==which) { // Unswap prefix and opcode for 6-byte Jcc unsigned char const t = b[ic-1]; b[ic-1] = b[ic]; b[ic] = t; } set_le32(b+ic+1,jc-ic-1); #endif if (b[ic+1] == cto8) return 1; // fail - buffer not restored lastnoncall = ic; noncalls2++; continue; } } calls++; ic += 4; lastcall = ic+1; } } else { assert(b[ic+1] != cto8); // this should not happen lastnoncall = ic; noncalls++; } } f->calls = calls; f->noncalls = noncalls; f->lastcall = lastcall; #if 0 || defined(TESTING) printf("\ncalls=%d noncalls=%d noncalls2=%d text_size=%x calltrickoffset=%x\n", calls,noncalls,noncalls2,size,cto8); printf("CALL/JMP/JCC %d %d %d\n",wtally[0],wtally[1],wtally[2]); #endif UNUSED(noncalls2); return 0; } /************************************************************************* // unfilter **************************************************************************/ #ifdef U static int U(Filter *f) { unsigned ic, jc; byte *const b = f->buf; const unsigned size5 = f->buf_len - 5; const unsigned cto = (unsigned)f->cto << 24; unsigned lastcall = 0; int hand = 0, tail = 0; const unsigned f_call = f80_call(f); const unsigned f_jmp1 = f80_jmp1(f); const unsigned f_jcc2 = f80_jcc2(f); unsigned mru[N_MRU]; memset(&mru[0], 0, sizeof(mru)); for (ic = 0; ic < size5; ic++) { int which; if (CONDU(which,b,ic,lastcall)) { unsigned f_on = 0; jc = get_be32(b+ic+1) - cto; if (b[ic+1] == f->cto) { if ((0==which && MRUFLT==f_call) || (1==which && MRUFLT==f_jmp1) || (2==which && MRUFLT==f_jcc2) ) { f_on = 1; if (1&jc) { // 1st time at this destination jc >>= 1; if (0 > --hand) { hand = N_MRU -1; } mru[hand] = jc; } else { // not 1st time at this destination jc >>= 1; if (N_MRU <= jc) { throwCompressedDataViolation(); } int kh = jc + hand; if (N_MRU <= kh) { kh -= N_MRU; } jc = mru[kh]; update_mru(jc, kh, mru, hand, tail); } set_le32(b+ic+1,jc-ic-1); } else if ((0==which && NOFILT!=f_call) || (1==which && NOFILT!=f_jmp1) || (2==which && NOFILT!=f_jcc2) ) { f_on = 1; set_le32(b+ic+1,jc-ic-1); } if (2==which && NOFILT!=f_jcc2) { // Unswap prefix and opcode for 6-byte Jcc unsigned char const t = b[ic-1]; b[ic-1] = b[ic]; b[ic] = t; } if (f_on) { f->calls++; ic += 4; f->lastcall = lastcall = ic+1; } } else f->noncalls++; } } return 0; } #endif #undef N_MRU #undef F #undef U /* vim:set ts=4 sw=4 et: */