From e02628535dfa295478b4be677f84e2eb4eccec23 Mon Sep 17 00:00:00 2001 From: bjornstahl Date: Thu, 2 Nov 2023 00:10:11 +0100 Subject: [PATCH] (core) don't force adopt handler ack on ep-create Fun one. The flow for appl switching with adoption as well as with crash recovery first runs the application entry point then the adopt handler. This is deliberate so that window/resource management code doesn't suffer from initialization order problems. The side effect from that is that if a frameserver connection is established in the entry point, it too will be exposed in the adoption handler. This has mostly been relevant for durden, but it has had a deferred / reopen on termination scheme that masked the issue. When testing a12- based hot network reloading with the @stdin bit, the net_open call does create a frameserver for the monitoring parent to connect through in order for shmif event mapping to be less painful. This had the side effect of getting pruned immediately through the missing _adopt handler, uncovering the issue. The solution as it stands is to track which reset- counter a frameserver was created and registered in. If they are the same it means that it was actually created in the main entry point and the script can be assumed having proper tracking for it. --- src/engine/arcan_conductor.c | 9 +++++++++ src/engine/arcan_conductor.h | 2 ++ src/engine/arcan_frameserver.h | 10 ++++++++++ src/engine/arcan_lua.c | 7 +++++++ src/engine/arcan_main.c | 3 +++ 5 files changed, 31 insertions(+) diff --git a/src/engine/arcan_conductor.c b/src/engine/arcan_conductor.c index f3c8c19be..a85dd0573 100644 --- a/src/engine/arcan_conductor.c +++ b/src/engine/arcan_conductor.c @@ -14,6 +14,7 @@ /* defined in platform.h, used in psep open, shared memory */ _Atomic uint64_t* volatile arcan_watchdog_ping = NULL; static size_t gpu_lock_bitmap; +static int reset_counter; void arcan_conductor_enable_watchdog() { @@ -251,6 +252,7 @@ void arcan_conductor_register_frameserver(struct arcan_frameserver* fsrv) { size_t dst_i = 0; alloc_frameserver_struct(); + fsrv->desc.recovery_tick = reset_counter; /* safeguard */ ssize_t src_i = find_frameserver(fsrv); @@ -591,6 +593,13 @@ bool arcan_conductor_valid_cycle() return valid_cycle; } +int arcan_conductor_reset_count(bool step) +{ + if (step) + reset_counter++; + return reset_counter; +} + static int trigger_video_synch(float frag) { conductor.set_deadline = -1; diff --git a/src/engine/arcan_conductor.h b/src/engine/arcan_conductor.h index 23b8f76d0..8f91509eb 100644 --- a/src/engine/arcan_conductor.h +++ b/src/engine/arcan_conductor.h @@ -117,6 +117,8 @@ void arcan_conductor_setsynch(const char* arg); void arcan_conductor_fakesynch(uint8_t left_ms); #ifndef VIDEO_PLATFORM_IMPL +int arcan_conductor_reset_count(bool step); + /* Update the priority target to match the specified frameserver. This * means that heuristics driving synchronization will be biased towards * letting the specific fsrv align synchronization - if the synchronization diff --git a/src/engine/arcan_frameserver.h b/src/engine/arcan_frameserver.h index c7362250b..d8bf09a33 100644 --- a/src/engine/arcan_frameserver.h +++ b/src/engine/arcan_frameserver.h @@ -93,6 +93,16 @@ struct arcan_frameserver_meta { unsigned long long framecount; unsigned long long dropcount; unsigned long long lastpts; + +/* This one was added to have a way to mark objects that were created in the + * main entrypoint of the lua scripts (that run before _adopt) but in recovery + * would still be exposed to adopt. If the developer would reject it there + * (from not implementing the handler or doing it in a contrived way) the + * frameserver would be immediately destroyed, causing a fs- race condition. + * + * This counter indicates which recovery- window the object was created in, + * so that the automatic deletion facility does not kill it off prematurely. */ + unsigned recovery_tick; }; struct frameserver_audsrc { diff --git a/src/engine/arcan_lua.c b/src/engine/arcan_lua.c index c4791259d..e173f8173 100644 --- a/src/engine/arcan_lua.c +++ b/src/engine/arcan_lua.c @@ -711,6 +711,7 @@ void arcan_lua_adopt(struct arcan_luactx* ctx) arcan_vobj_id delids[n_fsrv]; size_t delcount = 0; + int tc = arcan_conductor_reset_count(false); /* three: forward to adopt function (or delete) */ for (count = 0; count < n_fsrv; count++){ @@ -722,6 +723,12 @@ void arcan_lua_adopt(struct arcan_luactx* ctx) fsrv->tag = LUA_NOREF; bool delete = true; + +/* use this to determine if the frameserver was created in the appl entry + * and thus should not be exposed as an adopt handler */ + if (fsrv->desc.recovery_tick == tc) + continue; + if (alt_lookup_entry(ctx, "adopt", sizeof("adopt") - 1) && arcan_video_getobject(ids[count]) != NULL){ lua_pushvid(ctx, vobj->cellid); diff --git a/src/engine/arcan_main.c b/src/engine/arcan_main.c index e6aba1037..a749830a1 100644 --- a/src/engine/arcan_main.c +++ b/src/engine/arcan_main.c @@ -590,6 +590,7 @@ int MAIN_REDIR(int argc, char* argv[]) * invoke adopt() in the script */ bool adopt = false, in_recover = false; + int jumpcode = setjmp(arcanmain_recover_state); int saved, truncated; @@ -599,6 +600,7 @@ int MAIN_REDIR(int argc, char* argv[]) /* close down the database and reinitialize with the name of the new appl */ arcan_db_close(&dbhandle); arcan_db_set_shared(NULL); + arcan_conductor_reset_count(true); dbhandle = arcan_db_open(dbfname, arcan_appl_id()); if (!dbhandle) @@ -669,6 +671,7 @@ int MAIN_REDIR(int argc, char* argv[]) goto error; } + arcan_conductor_reset_count(true); arcan_event_maskall(evctx); arcan_video_recoverexternal(true, &saved, &truncated, NULL, NULL); arcan_event_clearmask(evctx);