+--- a/hw/xfree86/dri2/dri2.c
++++ b/hw/xfree86/dri2/dri2.c
+@@ -1,4 +1,4 @@
+-/*
++ /*
+ * Copyright © 2007, 2008 Red Hat, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+@@ -100,6 +100,7 @@ typedef struct _DRI2Drawable {
+ CARD64 last_swap_ust; /* ust at completion of most recent swap */
+ int swap_limit; /* for N-buffering */
+ Bool needInvalidate;
++ Bool client_created;
+ int prime_id;
+ PixmapPtr prime_slave_pixmap;
+ PixmapPtr redirectpixmap;
+@@ -114,6 +115,7 @@ typedef struct _DRI2Screen {
+ int fd;
+ unsigned int lastSequence;
+ int prime_id;
++ Bool need_async_swapbuffers_reply;
+
+ DRI2CreateBufferProcPtr CreateBuffer;
+ DRI2DestroyBufferProcPtr DestroyBuffer;
+@@ -201,6 +203,7 @@ DRI2AllocateDrawable(DrawablePtr pDraw)
+ if (pPriv == NULL)
+ return NULL;
+
++ pPriv->client_created = FALSE;
+ pPriv->dri2_screen = ds;
+ pPriv->drawable = pDraw;
+ pPriv->width = pDraw->width;
+@@ -328,6 +331,15 @@ DRI2CreateDrawable2(ClientPtr client, Dr
+ int rc;
+
+ pPriv = DRI2GetDrawable(pDraw);
++ if (pPriv && dri2_id_out == NULL && pPriv->client_created) {
++ /* We already allocated a DRI2Drawable for this drawable, the client
++ * has called into this function (so will receive invalidate events)
++ * and now we're being called by the client again (we know this because
++ * the caller doesn't care about the dri2_id). This means we don't need
++ * to allocate another one, we have nothing else to do. */
++ pPriv->prime_id = dri2_client->prime_id;
++ return Success;
++ }
+ if (pPriv == NULL)
+ pPriv = DRI2AllocateDrawable(pDraw);
+ if (pPriv == NULL)
+@@ -342,6 +354,11 @@ DRI2CreateDrawable2(ClientPtr client, Dr
+
+ if (dri2_id_out)
+ *dri2_id_out = dri2_id;
++ else {
++ /* The client has called in for the first time, it will now receive
++ * invalidate events. Record this for later. */
++ pPriv->client_created = TRUE;
++ }
+
+ return Success;
+ }
+@@ -1071,7 +1088,11 @@ DRI2WaitSwap(ClientPtr client, DrawableP
+ return FALSE;
+ }
+
+-
++Bool
++DRI2NeedAsyncSwapBuffersReply(DrawablePtr pDraw)
++{
++ return DRI2GetScreen(pDraw->pScreen)->need_async_swapbuffers_reply;
++}
+
+ int
+ DRI2SwapBuffers(ClientPtr client, DrawablePtr pDraw, CARD64 target_msc,
+@@ -1516,6 +1537,12 @@ DRI2ScreenInit(ScreenPtr pScreen, DRI2In
+ if (!ds->driverNames)
+ goto err_out;
+ ds->driverNames[0] = info->driverName;
++
++ /* Mali incorrectly assumes that a return from SwapBuffers means that
++ * buffers have been swapped, it doesn't wait for the relevant event.
++ * In this case, we work around this annoyance by delaying the
++ * SwapBuffers reply until the swap has actually completed. */
++ ds->need_async_swapbuffers_reply = strcmp(info->driverName, "armsoc") == 0;
+ }
+ else {
+ ds->numDrivers = info->numDrivers;
+--- a/hw/xfree86/dri2/dri2ext.c
++++ b/hw/xfree86/dri2/dri2ext.c
+@@ -53,6 +53,14 @@
+
+ static int DRI2EventBase;
+
++/* Data shared by ProcDRI2SwapBuffers and the asynchronous callback
++ * DRI2SwapEvent */
++struct SwapBuffersData {
++ CARD64 swap_target;
++ int refcnt;
++ DrawablePtr pDrawable;
++ CARD16 sequence;
++};
+
+ static Bool
+ validDrawable(ClientPtr client, XID drawable, Mask access_mode,
+@@ -276,7 +284,6 @@ ProcDRI2GetBuffers(ClientPtr client)
+
+ if (DRI2ThrottleClient(client, pDrawable))
+ return Success;
+-
+ attachments = (unsigned int *) &stuff[1];
+ buffers = DRI2GetBuffers(pDrawable, &width, &height,
+ attachments, stuff->count, &count);
+@@ -355,6 +362,24 @@ load_swap_reply(xDRI2SwapBuffersReply *
+ rep->swap_lo = sbc & 0xffffffff;
+ }
+
++static void swap_buffers_data_unref(struct SwapBuffersData *data)
++{
++ if (--data->refcnt == 0)
++ free(data);
++}
++
++static void send_swap_buffers_reply(ClientPtr client, struct SwapBuffersData *data)
++{
++ xDRI2SwapBuffersReply rep = {
++ .type = X_Reply,
++ .length = 0,
++ .sequenceNumber = data->sequence,
++ };
++
++ load_swap_reply(&rep, data->swap_target);
++ WriteToClient(client, sizeof(xDRI2SwapBuffersReply), &rep);
++}
++
+ static CARD64
+ vals_to_card64(CARD32 lo, CARD32 hi)
+ {
+@@ -365,11 +390,11 @@ static void
+ DRI2SwapEvent(ClientPtr client, void *data, int type, CARD64 ust, CARD64 msc,
+ CARD32 sbc)
+ {
+- DrawablePtr pDrawable = data;
++ struct SwapBuffersData *sbdata = data;
+ xDRI2BufferSwapComplete2 event = {
+ .type = DRI2EventBase + DRI2_BufferSwapComplete,
+ .event_type = type,
+- .drawable = pDrawable->id,
++ .drawable = sbdata->pDrawable->id,
+ .ust_hi = (CARD64) ust >> 32,
+ .ust_lo = ust & 0xffffffff,
+ .msc_hi = (CARD64) msc >> 32,
+@@ -377,6 +402,12 @@ DRI2SwapEvent(ClientPtr client, void *da
+ .sbc = sbc
+ };
+
++ if (DRI2NeedAsyncSwapBuffersReply(sbdata->pDrawable)) {
++ send_swap_buffers_reply(client, sbdata);
++ AttendClient(client);
++ }
++
++ swap_buffers_data_unref(data);
+ WriteEventsToClient(client, 1, (xEvent *) &event);
+ }
+
+@@ -384,14 +415,11 @@ static int
+ ProcDRI2SwapBuffers(ClientPtr client)
+ {
+ REQUEST(xDRI2SwapBuffersReq);
+- xDRI2SwapBuffersReply rep = {
+- .type = X_Reply,
+- .sequenceNumber = client->sequence,
+- .length = 0
+- };
+ DrawablePtr pDrawable;
+- CARD64 target_msc, divisor, remainder, swap_target;
++ CARD64 target_msc, divisor, remainder;
++ struct SwapBuffersData *data;
+ int status;
++ Bool need_async_swap_reply;
+
+ REQUEST_SIZE_MATCH(xDRI2SwapBuffersReq);
+
+@@ -399,6 +427,8 @@ ProcDRI2SwapBuffers(ClientPtr client)
+ DixReadAccess | DixWriteAccess, &pDrawable, &status))
+ return status;
+
++ need_async_swap_reply = DRI2NeedAsyncSwapBuffersReply(pDrawable);
++
+ /*
+ * Ensures an out of control client can't exhaust our swap queue, and
+ * also orders swaps.
+@@ -406,18 +436,49 @@ ProcDRI2SwapBuffers(ClientPtr client)
+ if (DRI2ThrottleClient(client, pDrawable))
+ return Success;
+
++ data = malloc(sizeof(struct SwapBuffersData));
++ /* DRI2SwapEvent() above might feel like the obvious place to free the
++ * data struct we just allocated. But consider the following sequence of
++ * events (and understand that swap_target is stored in data):
++ * 1. DRI2SwapBuffers accesses swap_target
++ * 2. DRI2SwapBuffers calls into ScheduleSwap
++ * 3. ScheduleSwap blits and calls DRI2SwapEvent before returning
++ * 4. Back in DRI2SwapBuffers, we access swap_target again
++ *
++ * If we had freed data in step 3, we would have a use-after-free in
++ * step 4.
++ *
++ * So, we set up a refcount of 2. One ref is owned by this function, and
++ * one ref is owned by DRI2SwapEvent. We only free the data once both
++ * functions are done. */
++ data->refcnt = 2;
++ data->pDrawable = pDrawable;
++ data->sequence = client->sequence;
++
+ target_msc = vals_to_card64(stuff->target_msc_lo, stuff->target_msc_hi);
+ divisor = vals_to_card64(stuff->divisor_lo, stuff->divisor_hi);
+ remainder = vals_to_card64(stuff->remainder_lo, stuff->remainder_hi);
+
++ /* If we're sending the reply later, we need to avoid processing any
++ * more requests from this client until that point. */
++ if (need_async_swap_reply)
++ IgnoreClient(client);
++
+ status = DRI2SwapBuffers(client, pDrawable, target_msc, divisor, remainder,
+- &swap_target, DRI2SwapEvent, pDrawable);
+- if (status != Success)
++ &data->swap_target, DRI2SwapEvent, data);
++
++ if (status != Success) {
++ swap_buffers_data_unref(data);
++ swap_buffers_data_unref(data);
++ if (need_async_swap_reply)
++ AttendClient(client);
+ return BadDrawable;
++ }
+
+- load_swap_reply(&rep, swap_target);
++ if (!need_async_swap_reply)
++ send_swap_buffers_reply(client, data);
+
+- WriteToClient(client, sizeof(xDRI2SwapBuffersReply), &rep);
++ swap_buffers_data_unref(data);
+
+ return Success;
+ }
+--- a/hw/xfree86/dri2/dri2.h
++++ b/hw/xfree86/dri2/dri2.h
+@@ -359,4 +359,6 @@ extern _X_EXPORT int DRI2GetParam(Client
+ CARD64 *value);
+
+ extern _X_EXPORT DrawablePtr DRI2UpdatePrime(DrawablePtr pDraw, DRI2BufferPtr pDest);
++
++extern Bool DRI2NeedAsyncSwapBuffersReply(DrawablePtr pDraw);
+ #endif