Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DWC2 DMA support #2576

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 3 additions & 1 deletion src/class/audio/audio_device.c
Expand Up @@ -65,8 +65,10 @@
//--------------------------------------------------------------------+

// Use ring buffer if it's available, some MCUs need extra RAM requirements
// For DWC2 enable ring buffer will disable DMA (if available)
#ifndef TUD_AUDIO_PREFER_RING_BUFFER
#if CFG_TUSB_MCU == OPT_MCU_LPC43XX || CFG_TUSB_MCU == OPT_MCU_LPC18XX || CFG_TUSB_MCU == OPT_MCU_MIMXRT1XXX
#if CFG_TUSB_MCU == OPT_MCU_LPC43XX || CFG_TUSB_MCU == OPT_MCU_LPC18XX || CFG_TUSB_MCU == OPT_MCU_MIMXRT1XXX || \
defined(TUP_USBIP_DWC2)
#define TUD_AUDIO_PREFER_RING_BUFFER 0
#else
#define TUD_AUDIO_PREFER_RING_BUFFER 1
Expand Down
186 changes: 144 additions & 42 deletions src/portable/synopsys/dwc2/dcd_dwc2.c
Expand Up @@ -67,18 +67,6 @@
// Debug level for DWC2
#define DWC2_DEBUG 2

#ifndef dcache_clean
#define dcache_clean(_addr, _size)
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

dcache code should be removed and add MPU suggestion to make usb ram non cacheable:

  • Not all internal buffers are aligned to cache line size and has size of multiple cache line size, speculative read or cache eviction of adjacent variables could lead to racing with DMA.
  • Frequent cache clean invalidate hurt performance
  • In most cases Cortex-M7 recommend use DTCM where cache is not used

#endif

#ifndef dcache_invalidate
#define dcache_invalidate(_addr, _size)
#endif

#ifndef dcache_clean_invalidate
#define dcache_clean_invalidate(_addr, _size)
#endif

static TU_ATTR_ALIGNED(4) uint32_t _setup_packet[2];

typedef struct {
Expand All @@ -98,6 +86,9 @@ static uint16_t ep0_pending[2]; // Index determines direction as t
// TX FIFO RAM allocation so far in words - RX FIFO size is readily available from dwc2->grxfsiz
static uint16_t _allocated_fifo_words_tx; // TX FIFO size in words (IN EPs)

// Number of IN endpoints active
static uint8_t _allocated_ep_in_count;

// SOF enabling flag - required for SOF to not get disabled in ISR when SOF was enabled by
static bool _sof_en;

Expand Down Expand Up @@ -165,6 +156,12 @@ static bool fifo_alloc(uint8_t rhport, uint8_t ep_addr, uint16_t packet_size) {
dwc2->grxfsiz = sz;
}
} else {
// Check IN endpoints concurrently active limit
if(_dwc2_controller->ep_in_count) {
TU_ASSERT(_allocated_ep_in_count < _dwc2_controller->ep_in_count);
_allocated_ep_in_count++;
}

// Note if The TXFELVL is configured as half empty. In order
// to be able to write a packet at that point, the fifo must be twice the max_size.
if ((dwc2->gahbcfg & GAHBCFG_TXFELVL) == 0) {
Expand All @@ -186,6 +183,38 @@ static bool fifo_alloc(uint8_t rhport, uint8_t ep_addr, uint16_t packet_size) {
return true;
}

static inline bool dma_enabled(uint8_t rhport)
{
// DMA doesn't support fifo transfer
#ifdef TUD_AUDIO_PREFER_RING_BUFFER
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't want to depend on class driver but maybe it's the simplest way.

#if TUD_AUDIO_PREFER_RING_BUFFER
return false;
#endif
#endif
dwc2_regs_t* dwc2 = DWC2_REG(rhport);
// Internal DMA only
return (dwc2->ghwcfg2_bm.arch == 2);
}

static void dma_stpkt_rx(uint8_t rhport)
{
dwc2_regs_t* dwc2 = DWC2_REG(rhport);

if (dwc2->gsnpsid >= DWC2_CORE_REV_3_00a) {
if(dwc2->epout[0].doepctl & DOEPCTL_EPENA)
return;
}

// Receive only 1 packet
dwc2->epout[0].doeptsiz = 0;
dwc2->epout[0].doeptsiz |= (1 << DOEPTSIZ_STUPCNT_Pos);
dwc2->epout[0].doeptsiz |= 8;
dwc2->epout[0].doeptsiz |= (1 << DOEPTSIZ_PKTCNT_Pos);
dwc2->epout[0].doepdma = (uintptr_t)_setup_packet;

dwc2->epout[0].doepctl |= DOEPCTL_EPENA | DOEPCTL_USBAEP;
}

static void edpt_activate(uint8_t rhport, tusb_desc_endpoint_t const * p_endpoint_desc) {
dwc2_regs_t* dwc2 = DWC2_REG(rhport);
uint8_t const epnum = tu_edpt_number(p_endpoint_desc->bEndpointAddress);
Expand Down Expand Up @@ -272,6 +301,8 @@ static void bus_reset(uint8_t rhport) {

_sof_en = false;

_allocated_ep_in_count = 1;

// clear device address
dwc2->dcfg &= ~DCFG_DAD_Msk;

Expand Down Expand Up @@ -351,6 +382,12 @@ static void bus_reset(uint8_t rhport) {
// Setup the control endpoint 0
_allocated_fifo_words_tx = 16;

// DMA needs extra space for processing
if(dma_enabled(rhport)) {
uint16_t reserved = _dwc2_controller[rhport].ep_fifo_size / 4- dwc2->ghwcfg3_bm.total_fifo_size;
Copy link
Collaborator Author

@HiFiPhile HiFiPhile Apr 25, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Kernel use dwc2->ghwcfg3_bm.total_fifo_size it should be reliable.
We can even remove _dwc2_controller[rhport].ep_fifo_size and use this field instead, except GD32VF103 looks strange.

_allocated_fifo_words_tx += reserved;
}

// Control IN uses FIFO 0 with 64 bytes ( 16 32-bit word )
dwc2->dieptxf0 = (16 << DIEPTXF0_TX0FD_Pos) | (_dwc2_controller[rhport].ep_fifo_size / 4 - _allocated_fifo_words_tx);

Expand All @@ -361,6 +398,10 @@ static void bus_reset(uint8_t rhport) {

dwc2->epout[0].doeptsiz |= (3 << DOEPTSIZ_STUPCNT_Pos);

if(dma_enabled(rhport)) {
dma_stpkt_rx(rhport);
}

dwc2->gintmsk |= GINTMSK_OEPINT | GINTMSK_IEPINT;
}

Expand All @@ -369,11 +410,11 @@ static void edpt_schedule_packets(uint8_t rhport, uint8_t const epnum, uint8_t c
(void) rhport;

dwc2_regs_t* dwc2 = DWC2_REG(rhport);
xfer_ctl_t* const xfer = XFER_CTL_BASE(epnum, dir);

// EP0 is limited to one packet each xfer
// We use multiple transaction of xfer->max_size length to get a whole transfer done
if (epnum == 0) {
xfer_ctl_t* const xfer = XFER_CTL_BASE(epnum, dir);
total_bytes = tu_min16(ep0_pending[dir], xfer->max_size);
ep0_pending[dir] -= total_bytes;
}
Expand All @@ -386,17 +427,31 @@ static void edpt_schedule_packets(uint8_t rhport, uint8_t const epnum, uint8_t c
epin[epnum].dieptsiz = (num_packets << DIEPTSIZ_PKTCNT_Pos) |
((total_bytes << DIEPTSIZ_XFRSIZ_Pos) & DIEPTSIZ_XFRSIZ_Msk);

epin[epnum].diepctl |= DIEPCTL_EPENA | DIEPCTL_CNAK;
if(dma_enabled(rhport)) {
epin[epnum].diepdma = (uintptr_t)xfer->buffer;

// For ISO endpoint set correct odd/even bit for next frame.
if ((epin[epnum].diepctl & DIEPCTL_EPTYP) == DIEPCTL_EPTYP_0 && (XFER_CTL_BASE(epnum, dir))->interval == 1) {
// Take odd/even bit from frame counter.
uint32_t const odd_frame_now = (dwc2->dsts & (1u << DSTS_FNSOF_Pos));
epin[epnum].diepctl |= (odd_frame_now ? DIEPCTL_SD0PID_SEVNFRM_Msk : DIEPCTL_SODDFRM_Msk);
}
// Enable fifo empty interrupt only if there are something to put in the fifo.
if (total_bytes != 0) {
dwc2->diepempmsk |= (1 << epnum);
// For ISO endpoint set correct odd/even bit for next frame.
if ((epin[epnum].diepctl & DIEPCTL_EPTYP) == DIEPCTL_EPTYP_0 && (XFER_CTL_BASE(epnum, dir))->interval == 1) {
// Take odd/even bit from frame counter.
uint32_t const odd_frame_now = (dwc2->dsts & (1u << DSTS_FNSOF_Pos));
epin[epnum].diepctl |= (odd_frame_now ? DIEPCTL_SD0PID_SEVNFRM_Msk : DIEPCTL_SODDFRM_Msk);
}

epin[epnum].diepctl |= DIEPCTL_EPENA | DIEPCTL_CNAK;
} else {

epin[epnum].diepctl |= DIEPCTL_EPENA | DIEPCTL_CNAK;

// For ISO endpoint set correct odd/even bit for next frame.
if ((epin[epnum].diepctl & DIEPCTL_EPTYP) == DIEPCTL_EPTYP_0 && (XFER_CTL_BASE(epnum, dir))->interval == 1) {
// Take odd/even bit from frame counter.
uint32_t const odd_frame_now = (dwc2->dsts & (1u << DSTS_FNSOF_Pos));
epin[epnum].diepctl |= (odd_frame_now ? DIEPCTL_SD0PID_SEVNFRM_Msk : DIEPCTL_SODDFRM_Msk);
}
// Enable fifo empty interrupt only if there are something to put in the fifo.
if (total_bytes != 0) {
dwc2->diepempmsk |= (1 << epnum);
}
}
} else {
dwc2_epout_t* epout = dwc2->epout;
Expand All @@ -406,13 +461,18 @@ static void edpt_schedule_packets(uint8_t rhport, uint8_t const epnum, uint8_t c
epout[epnum].doeptsiz |= (num_packets << DOEPTSIZ_PKTCNT_Pos) |
((total_bytes << DOEPTSIZ_XFRSIZ_Pos) & DOEPTSIZ_XFRSIZ_Msk);

epout[epnum].doepctl |= DOEPCTL_EPENA | DOEPCTL_CNAK;
if ((epout[epnum].doepctl & DOEPCTL_EPTYP) == DOEPCTL_EPTYP_0 &&
XFER_CTL_BASE(epnum, dir)->interval == 1) {
// Take odd/even bit from frame counter.
uint32_t const odd_frame_now = (dwc2->dsts & (1u << DSTS_FNSOF_Pos));
epout[epnum].doepctl |= (odd_frame_now ? DOEPCTL_SD0PID_SEVNFRM_Msk : DOEPCTL_SODDFRM_Msk);
}

if(dma_enabled(rhport)) {
epout[epnum].doepdma = (uintptr_t)xfer->buffer;
}

epout[epnum].doepctl |= DOEPCTL_EPENA | DOEPCTL_CNAK;
}
}

Expand Down Expand Up @@ -619,6 +679,11 @@ void dcd_init(uint8_t rhport) {
// Enable global interrupt
dwc2->gahbcfg |= GAHBCFG_GINT;

if (dma_enabled(rhport)) {
dwc2->gahbcfg |= GAHBCFG_DMAEN | GAHBCFG_HBSTLEN_2;
dwc2->gintmsk &=~GINTMSK_RXFLVLM;
}

// make sure we are in device mode
// TU_ASSERT(!(dwc2->gintsts & GINTSTS_CMOD), );

Expand Down Expand Up @@ -706,6 +771,8 @@ void dcd_edpt_close_all(uint8_t rhport) {
dwc2_regs_t* dwc2 = DWC2_REG(rhport);
uint8_t const ep_count = _dwc2_controller[rhport].ep_count;

_allocated_ep_in_count = 1;

// Disable non-control interrupt
dwc2->daintmsk = (1 << DAINTMSK_OEPM_Pos) | (1 << DAINTMSK_IEPM_Pos);

Expand All @@ -730,6 +797,12 @@ void dcd_edpt_close_all(uint8_t rhport) {

fifo_flush_tx(dwc2, 0x10); // all tx fifo
fifo_flush_rx(dwc2);

// DMA needs extra space for processing
if(dma_enabled(rhport)) {
uint16_t reserved = _dwc2_controller[rhport].ep_fifo_size / 4- dwc2->ghwcfg3_bm.total_fifo_size;
_allocated_fifo_words_tx += reserved;
}
}

bool dcd_edpt_iso_alloc(uint8_t rhport, uint8_t ep_addr, uint16_t largest_packet_size) {
Expand Down Expand Up @@ -809,6 +882,9 @@ void dcd_edpt_close(uint8_t rhport, uint8_t ep_addr) {

void dcd_edpt_stall(uint8_t rhport, uint8_t ep_addr) {
edpt_disable(rhport, ep_addr, true);
if((tu_edpt_number(ep_addr) == 0) && dma_enabled(rhport)) {
dma_stpkt_rx(rhport);
}
}

void dcd_edpt_clear_stall(uint8_t rhport, uint8_t ep_addr) {
Expand Down Expand Up @@ -991,33 +1067,56 @@ static void handle_epout_irq(uint8_t rhport) {

uint32_t const doepint = epout->doepint;

// SETUP packet Setup Phase done.
if (doepint & DOEPINT_STUP) {
uint32_t clear_flag = DOEPINT_STUP;

// STPKTRX is only available for version from 3_00a
if ((doepint & DOEPINT_STPKTRX) && (dwc2->gsnpsid >= DWC2_CORE_REV_3_00a)) {
clear_flag |= DOEPINT_STPKTRX;
}

epout->doepint = clear_flag;
dcd_event_setup_received(rhport, (uint8_t*) _setup_packet, true);
}

// OUT XFER complete
if (epout->doepint & DOEPINT_XFRC) {
epout->doepint = DOEPINT_XFRC;

xfer_ctl_t* xfer = XFER_CTL_BASE(n, TUSB_DIR_OUT);

// EP0 can only handle one packet
if ((n == 0) && ep0_pending[TUSB_DIR_OUT]) {
// Schedule another packet to be received.
edpt_schedule_packets(rhport, n, TUSB_DIR_OUT, 1, ep0_pending[TUSB_DIR_OUT]);
if (doepint & DOEPINT_STUP) {
// STPKTRX is only available for version from 3_00a
if ((doepint & DOEPINT_STPKTRX) && (dwc2->gsnpsid >= DWC2_CORE_REV_3_00a)) {
epout->doepint = DOEPINT_STPKTRX;
}
} else if (doepint & DOEPINT_OTEPSPR) {
epout->doepint = DOEPINT_OTEPSPR;
} else {
dcd_event_xfer_complete(rhport, n, xfer->total_len, XFER_RESULT_SUCCESS, true);
if ((doepint & DOEPINT_STPKTRX) && (dwc2->gsnpsid >= DWC2_CORE_REV_3_00a)) {
epout->doepint = DOEPINT_STPKTRX;
} else {
// EP0 can only handle one packet
if ((n == 0) && ep0_pending[TUSB_DIR_OUT]) {
// Schedule another packet to be received.
edpt_schedule_packets(rhport, n, TUSB_DIR_OUT, 1, ep0_pending[TUSB_DIR_OUT]);
} else {
if(dma_enabled(rhport)) {
// Fix packet length
uint16_t remain = (epout->doeptsiz & DOEPTSIZ_XFRSIZ_Msk) >> DOEPTSIZ_XFRSIZ_Pos;
xfer->total_len -= remain;
// this is ZLP, so prepare EP0 for next setup
if(n == 0 && xfer->total_len == 0) {
dma_stpkt_rx(rhport);
}
}

dcd_event_xfer_complete(rhport, n, xfer->total_len, XFER_RESULT_SUCCESS, true);
}
}
}
}

// SETUP packet Setup Phase done.
if (doepint & DOEPINT_STUP) {
epout->doepint = DOEPINT_STUP;
if ((doepint & DOEPINT_STPKTRX) && (dwc2->gsnpsid >= DWC2_CORE_REV_3_00a)) {
epout->doepint = DOEPINT_STPKTRX;
}
if(dma_enabled(rhport) && (dwc2->gsnpsid >= DWC2_CORE_REV_3_00a)) {
dma_stpkt_rx(rhport);
}

dcd_event_setup_received(rhport, (uint8_t*) _setup_packet, true);
}
}
}
}
Expand All @@ -1042,6 +1141,9 @@ static void handle_epin_irq(uint8_t rhport) {
// Schedule another packet to be transmitted.
edpt_schedule_packets(rhport, n, TUSB_DIR_IN, 1, ep0_pending[TUSB_DIR_IN]);
} else {
if((n == 0) && dma_enabled(rhport)) {
dma_stpkt_rx(rhport);
}
dcd_event_xfer_complete(rhport, n | TUSB_DIR_IN_MASK, xfer->total_len, XFER_RESULT_SUCCESS, true);
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/portable/synopsys/dwc2/dwc2_bcm.h
Expand Up @@ -39,7 +39,7 @@

static const dwc2_controller_t _dwc2_controller[] =
{
{ .reg_base = USB_OTG_GLOBAL_BASE, .irqnum = USB_IRQn, .ep_count = DWC2_EP_MAX, .ep_fifo_size = 4096 }
{ .reg_base = USB_OTG_GLOBAL_BASE, .irqnum = USB_IRQn, .ep_count = DWC2_EP_MAX, .ep_fifo_size = 16384 }
};

#define dcache_clean(_addr, _size) data_clean(_addr, _size)
Expand Down
5 changes: 3 additions & 2 deletions src/portable/synopsys/dwc2/dwc2_esp32.h
Expand Up @@ -37,11 +37,12 @@
//#include "soc/usb_periph.h"

#define DWC2_REG_BASE 0x60080000UL
#define DWC2_EP_MAX 6 // USB_OUT_EP_NUM. TODO ESP32Sx only has 5 tx fifo (5 endpoint IN)
#define DWC2_EP_MAX 7
#define DWC2_EP_IN_MAX 5

static const dwc2_controller_t _dwc2_controller[] =
{
{ .reg_base = DWC2_REG_BASE, .irqnum = 0, .ep_count = DWC2_EP_MAX, .ep_fifo_size = 1024 }
{ .reg_base = DWC2_REG_BASE, .irqnum = 0, .ep_count = DWC2_EP_MAX, .ep_in_count = DWC2_EP_IN_MAX, .ep_fifo_size = 1024 }
};

static intr_handle_t usb_ih;
Expand Down
7 changes: 4 additions & 3 deletions src/portable/synopsys/dwc2/dwc2_type.h
Expand Up @@ -29,6 +29,7 @@ typedef struct
uintptr_t reg_base;
uint32_t irqnum;
uint8_t ep_count;
uint8_t ep_in_count;
uint32_t ep_fifo_size;
}dwc2_controller_t;

Expand Down Expand Up @@ -213,15 +214,15 @@ union {
volatile uint32_t ghwcfg1; // 044 User Hardware Configuration1: endpoint dir (2 bit per ep)
union {
volatile uint32_t ghwcfg2; // 048 User Hardware Configuration2
dwc2_ghwcfg2_t ghwcfg2_bm;
volatile dwc2_ghwcfg2_t ghwcfg2_bm;
};
union {
volatile uint32_t ghwcfg3; // 04C User Hardware Configuration3
dwc2_ghwcfg3_t ghwcfg3_bm;
volatile dwc2_ghwcfg3_t ghwcfg3_bm;
};
union {
volatile uint32_t ghwcfg4; // 050 User Hardware Configuration4
dwc2_ghwcfg4_t ghwcfg4_bm;
volatile dwc2_ghwcfg4_t ghwcfg4_bm;
};
volatile uint32_t glpmcfg; // 054 Core LPM Configuration
volatile uint32_t gpwrdn; // 058 Power Down
Expand Down