Skip to content

Commit

Permalink
PowerPC {32, 64}-bit Block Trampolines (#272)
Browse files Browse the repository at this point in the history
* Implement PowerPC block trampoline

* Adjust pagesize on ppc64

* Skip UnexpectedException test for PowerPC

* Move PAGE_SIZE to asmconstants.h

* Use PAGE_SIZE and PAGE_SHIFT macros for PowerPC

* Add ppc64el and powerpc qemu-crossbuild targets

* Add NO_SAFE_CACHING definition and guards

* Do not export objc_method_cache_version on ppc32

---------

Co-authored-by: David Chisnall <davidchisnall@users.noreply.github.com>
  • Loading branch information
hmelder and davidchisnall committed Feb 12, 2024
1 parent 1ff5e12 commit e882423
Show file tree
Hide file tree
Showing 10 changed files with 105 additions and 9 deletions.
6 changes: 5 additions & 1 deletion .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,10 @@ jobs:
system-processor: riscv64
triple: riscv64-linux-gnu
rtld: ld-linux-riscv64-lp64d.so.1
- name: ppc64el
system-processor: powerpc64le
triple: powerpc64le-linux-gnu
rtld: ld64.so.2
# lld versions prior to 15 do not support R_RISCV_ALIGN relocations
exclude:
- llvm-version: 13
Expand All @@ -108,7 +112,7 @@ jobs:
sudo apt install libstdc++-9-dev-${{ matrix.arch.name }}-cross qemu-user ninja-build
- name: Configure CMake
run: |
export LDFLAGS="-L/usr/lib/llvm-${{ matrix.llvm-version }}/lib/ -fuse-ld=lld -Wl,--dynamic-linker=/usr/${{ matrix.arch.triple }}/lib/${{ matrix.arch.rtld }},-rpath,/usr/${{ matrix.arch.triple }}/lib"
export LDFLAGS="-L/usr/lib/llvm-${{ matrix.llvm-version }}/lib/ -fuse-ld=lld-${{ matrix.llvm-version}} -Wl,--dynamic-linker=/usr/${{ matrix.arch.triple }}/lib/${{ matrix.arch.rtld }},-rpath,/usr/${{ matrix.arch.triple }}/lib"
cmake -B ${{github.workspace}}/build \
-DCMAKE_SYSTEM_NAME=Linux \
-DCMAKE_SYSTEM_PROCESSOR=${{ matrix.arch.system-processor }} \
Expand Down
4 changes: 4 additions & 0 deletions CMake/detect_arch.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@
#error i386
#elif defined(__x86_64__)
#error x86_64
#elif defined(__powerpc64__)
#error powerpc64
#elif defined(__powerpc__)
#error powerpc
#else
#error unknown
#endif
10 changes: 9 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ try_compile(
)

if(NOT COMPILE_SUCCESS)
string(REGEX MATCH "(aarch64|arm|i386|x86_64|unknown)" ARCHITECTURE ${COMPILE_OUTPUT})
string(REGEX MATCH "(aarch64|arm|i386|x86_64|powerpc64|powerpc|unknown)" ARCHITECTURE ${COMPILE_OUTPUT})
endif()

set(ARCHITECTURE ${ARCHITECTURE} CACHE STRING "Architecture Type")
Expand Down Expand Up @@ -187,6 +187,14 @@ set(INCLUDE_DIRECTORY "objc" CACHE STRING

add_compile_options($<$<STREQUAL:${CMAKE_SYSTEM_PROCESSOR},i686>:-march=i586>)

# PowerPC 32-bit does not support native 64-bit atomic operations,
# which is used in safe caching.
# You must also update the guard in objc/runtime.h, when updating
# this macro.
if (ARCHITECTURE STREQUAL "powerpc")
add_definitions(-DNO_SAFE_CACHING)
endif()

set(INSTALL_TARGETS objc)

if(WIN32)
Expand Down
2 changes: 1 addition & 1 deletion Test/UnexpectedException.m
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ LONG WINAPI _UnhandledExceptionFilter(struct _EXCEPTION_POINTERS* exceptionInfo)

int main(void)
{
#if !(defined(__arm__) || defined(__ARM_ARCH_ISA_A64))
#if !(defined(__arm__) || defined(__ARM_ARCH_ISA_A64)) && !defined(__powerpc__)
#if defined(_WIN32) && !defined(__MINGW32__)
// also verify that an existing handler still gets called after we set ours
SetUnhandledExceptionFilter(&_UnhandledExceptionFilter);
Expand Down
8 changes: 8 additions & 0 deletions asmconstants.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,11 @@
#define SLOT_OFFSET 0
#endif
#define SMALLOBJ_MASK ((1<<SMALLOBJ_BITS) - 1)

// Page size configuration
#if defined(__powerpc64__)
# define PAGE_SHIFT 16
#else
# define PAGE_SHIFT 12
#endif
#define PAGE_SIZE (1<<PAGE_SHIFT)
15 changes: 10 additions & 5 deletions block_to_imp.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include "blocks_runtime.h"
#include "lock.h"
#include "visibility.h"
#include "asmconstants.h" // For PAGE_SIZE

#ifndef __has_builtin
#define __has_builtin(x) 0
Expand Down Expand Up @@ -95,22 +96,26 @@ static int mprotect(void *buffer, size_t len, int prot)
# endif
#endif

#define PAGE_SIZE 4096

struct block_header
{
void *block;
void(*fnptr)(void);
/**
* On 64-bit platforms, we have 16 bytes for instructions, which ought to
* be enough without padding. On MIPS, we need
* be enough without padding.
* Note: If we add too much padding, then we waste space but have no other
* ill effects. If we get this too small, then the assert in
* `init_trampolines` will fire on library load.
*
* PowerPC: We need INSTR_CNT * INSTR_LEN = 7*4 = 28 bytes
* for instruction. sizeof(block_header) must be a divisor of
* PAGE_SIZE, so we need to pad block_header to 32 bytes.
* On PowerPC 64-bit where sizeof(void *) = 8 bytes, we
* add 16 bytes of padding.
*/
#if defined(__i386__) || (defined(__mips__) && !defined(__mips_n64))
#if defined(__i386__) || (defined(__mips__) && !defined(__mips_n64)) || (defined(__powerpc__) && !defined(__powerpc64__))
uint64_t padding[3];
#elif defined(__mips__)
#elif defined(__mips__) || defined(__powerpc64__)
uint64_t padding[2];
#elif defined(__arm__)
uint64_t padding;
Expand Down
40 changes: 40 additions & 0 deletions block_trampolines.S
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include "common.S"
#include "asmconstants.h"

#
# This file defines some trampolines for calling blocks. A block function
Expand Down Expand Up @@ -98,6 +99,45 @@
#define ARG1 $a1
#define ARG2 $a2

#elif defined(__powerpc__)
////////////////////////////////////////////////////////////////////////////////
// PowerPC trampoline
////////////////////////////////////////////////////////////////////////////////

#if defined(__powerpc64__)
#define LOAD ld
#define OFFSET 8
#else
#define LOAD lwz
#define OFFSET 4
#endif

.macro trampoline arg0, arg1
mfctr %r12 # The block trampoline is always called
# via a function pointer. We can thus
# assume that ctr contains the trampline
# entry point address from the previous
# branch to this trampoline (bctrl).

#if PAGE_SHIFT < 16
addi %r12, %r12, -PAGE_SIZE # Substract page size from entry point
#else
addis %r12, %r12, (-0x1 << (PAGE_SHIFT - 16))
#endif

mr \arg1, \arg0
LOAD \arg0, 0(%r12)
LOAD %r12, OFFSET(%r12)
mtctr %r12 # Move block function pointer into ctr
bctr # Branch to block function
.endm

#define ARG0 %r3
#define ARG1 %r4
#define ARG2 %r5
#define SARG0 ARG1
#define SARG1 ARG2

#elif defined(__riscv) && (__riscv_xlen == 64)
////////////////////////////////////////////////////////////////////////////////
// RISC-V trampoline
Expand Down
8 changes: 7 additions & 1 deletion dtable.c
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,9 @@ PRIVATE mutex_t initialize_lock;
* 2^x in increments of 8. */
static uint32_t dtable_depth = 8;

#ifndef NO_SAFE_CACHING
_Atomic(uint64_t) objc_method_cache_version;
#endif

/**
* Starting at `cls`, finds the class that provides the implementation of the
Expand Down Expand Up @@ -404,7 +406,9 @@ static BOOL installMethodInDtable(Class class,
// Invalidate the old slot, if there is one.
if (NULL != oldMethod)
{
#ifndef NO_SAFE_CACHING
objc_method_cache_version++;
#endif
}
return YES;
}
Expand Down Expand Up @@ -520,7 +524,9 @@ PRIVATE void objc_update_dtable_for_new_superclass(Class cls, Class newSuper)
LOCK_RUNTIME_FOR_SCOPE();
rebaseDtableRecursive(cls, newSuper);
// Invalidate all caches after this operation.
objc_method_cache_version++;
#ifndef NO_SAFE_CACHING
objc_method_cache_version++;
#endif

return;
}
Expand Down
6 changes: 6 additions & 0 deletions objc/slot.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,14 @@ struct objc_slot2
* A counter that is incremented whenever one or more cached slots become
* invalid, for example if a subclass loads a category containing methods that
* were inherited from the superclass.
*
* Caching is disabled on targets without native 64-bit atomics support such
* as PowerPC 32-bit.
*/
#if defined(__powerpc__) && !defined(__powerpc64__)
#else
OBJC_PUBLIC extern _Atomic(uint64_t) objc_method_cache_version;
#endif

/**
* Legacy cache structure. This is no longer maintained in the runtime and is
Expand Down
15 changes: 15 additions & 0 deletions sendmsg2.c
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,12 @@ struct objc_slot2 *objc_msg_lookup_internal(id *receiver, SEL selector, uint64_t
{
if (version)
{
#ifdef NO_SAFE_CACHING
// Always write 0 to version, marking the slot as uncacheable.
*version = 0;
#else
*version = objc_method_cache_version;
#endif
}
Class class = classForObject((*receiver));
retry:;
Expand All @@ -118,10 +123,12 @@ retry:;
{
if ((result = objc_dtable_lookup(dtable, get_untyped_idx(selector))))
{
#ifndef NO_SAFE_CACHING
if (version)
{
*version = 0;
}
#endif
uncacheable_slot.imp = call_mismatch_hook(class, selector, result);
result = (struct objc_slot2*)&uncacheable_slot;
}
Expand All @@ -135,10 +142,12 @@ retry:;
}
if (0 == result)
{
#ifndef NO_SAFE_CACHING
if (version)
{
*version = 0;
}
#endif
uncacheable_slot.imp = __objc_msg_forward2(*receiver, selector);
result = (struct objc_slot2*)&uncacheable_slot;
}
Expand Down Expand Up @@ -236,10 +245,12 @@ struct objc_slot2 *objc_slot_lookup_version(id *receiver, SEL selector, uint64_t
// inlined trivially.
if (UNLIKELY(*receiver == nil))
{
#ifndef NO_SAFE_CACHING
if (version)
{
*version = 0;
}
#endif
// Return the correct kind of zero, depending on the type encoding.
if (selector->types)
{
Expand Down Expand Up @@ -350,10 +361,12 @@ struct objc_slot *objc_slot_lookup_super(struct objc_super *super, SEL selector)
*/
struct objc_slot2 *objc_get_slot2(Class cls, SEL selector, uint64_t *version)
{
#ifndef NO_SAFE_CACHING
if (version)
{
*version = objc_method_cache_version;
}
#endif
struct objc_slot2 * result = objc_dtable_lookup(cls->dtable, selector->index);
if (0 == result)
{
Expand All @@ -374,10 +387,12 @@ struct objc_slot2 *objc_get_slot2(Class cls, SEL selector, uint64_t *version)
{
if ((result = objc_dtable_lookup(dtable, get_untyped_idx(selector))))
{
#ifndef NO_SAFE_CACHING
if (version)
{
*version = 0;
}
#endif
uncacheable_slot.imp = call_mismatch_hook(cls, selector, result);
result = (struct objc_slot2*)&uncacheable_slot;
}
Expand Down

0 comments on commit e882423

Please sign in to comment.