[OpenWrt-Devel] [PATCH] ar71xx: check for stuck DMA on AR724x & fix sirq storm after recovery
Felix Fietkau
nbd at openwrt.org
Fri Jan 8 11:49:38 EST 2016
On 2016-01-08 06:26, Conn O'Griofa wrote:
> Hi,
>
> I'm proposing the following patch to resolve ticket #18922 fully.
>
> With the current master revision, when a tx timeout condition occurs,
> the interface recovers successfully, but a soft irq storm occurs
> (causing ksoftirqd to peg the CPU, due to this goto being called
> without end:
> https://github.com/openwrt-mirror/openwrt/blob/master/target/linux/ar71xx/files/drivers/net/ethernet/atheros/ag71xx/ag71xx_main.c#L1073
> ). Forcing the tx and rx rings to be cleared and re-inited in
> ag71xx_restart_work_func seems to avoid the sirq storm, but I'd
> appreciate feedback on whether there's a more effective workaround.
>
> Additionally, ag71xx_check_dma_stuck *does* successfully detect the
> stuck DMA condition on AR7241 (TR-WL842ND v1), so enabling the check
> for this chipset series ensures a link adjust occurs *before* an
> actual tx timeout is detected. This avoids the brief network
> interruption that normally occurs during the DMA stuck -> tx timeout
> -> link adjust condition.
>
> Conn
>
> P.S. The sirq storm also occurs when ag71xx_check_dma_stuck is utilized on this chipset to avoid the tx timeout condition, so it appears that both changes are necessary (or at least, a better way to solve the sirq storm needs to be discovered).
Thanks for investigating this further. Please try this patch:
---
--- a/target/linux/ar71xx/files/drivers/net/ethernet/atheros/ag71xx/ag71xx_main.c
+++ b/target/linux/ar71xx/files/drivers/net/ethernet/atheros/ag71xx/ag71xx_main.c
@@ -631,34 +631,61 @@ void ag71xx_link_adjust(struct ag71xx *ag)
ag71xx_rr(ag, AG71XX_REG_MAC_IFCTL));
}
+static int ag71xx_hw_enable(struct ag71xx *ag)
+{
+ int ret;
+
+ ret = ag71xx_rings_init(ag);
+ if (ret)
+ return ret;
+
+ napi_enable(&ag->napi);
+ ag71xx_wr(ag, AG71XX_REG_TX_DESC, ag->tx_ring.descs_dma);
+ ag71xx_wr(ag, AG71XX_REG_RX_DESC, ag->rx_ring.descs_dma);
+ netif_start_queue(ag->dev);
+
+ return 0;
+}
+
+static void ag71xx_hw_disable(struct ag71xx *ag)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&ag->lock, flags);
+
+ netif_stop_queue(ag->dev);
+
+ ag71xx_hw_stop(ag);
+ ag71xx_dma_reset(ag);
+
+ napi_disable(&ag->napi);
+ del_timer_sync(&ag->oom_timer);
+
+ spin_unlock_irqrestore(&ag->lock, flags);
+
+ ag71xx_rings_cleanup(ag);
+}
+
static int ag71xx_open(struct net_device *dev)
{
struct ag71xx *ag = netdev_priv(dev);
unsigned int max_frame_len;
int ret;
+ netif_carrier_off(dev);
max_frame_len = ag71xx_max_frame_len(dev->mtu);
ag->rx_buf_size = max_frame_len + NET_SKB_PAD + NET_IP_ALIGN;
/* setup max frame length */
ag71xx_wr(ag, AG71XX_REG_MAC_MFL, max_frame_len);
+ ag71xx_hw_set_macaddr(ag, dev->dev_addr);
- ret = ag71xx_rings_init(ag);
+ ret = ag71xx_hw_enable(ag);
if (ret)
goto err;
- napi_enable(&ag->napi);
-
- netif_carrier_off(dev);
ag71xx_phy_start(ag);
- ag71xx_wr(ag, AG71XX_REG_TX_DESC, ag->tx_ring.descs_dma);
- ag71xx_wr(ag, AG71XX_REG_RX_DESC, ag->rx_ring.descs_dma);
-
- ag71xx_hw_set_macaddr(ag, dev->dev_addr);
-
- netif_start_queue(dev);
-
return 0;
err:
@@ -669,24 +696,10 @@ err:
static int ag71xx_stop(struct net_device *dev)
{
struct ag71xx *ag = netdev_priv(dev);
- unsigned long flags;
netif_carrier_off(dev);
ag71xx_phy_stop(ag);
-
- spin_lock_irqsave(&ag->lock, flags);
-
- netif_stop_queue(dev);
-
- ag71xx_hw_stop(ag);
- ag71xx_dma_reset(ag);
-
- napi_disable(&ag->napi);
- del_timer_sync(&ag->oom_timer);
-
- spin_unlock_irqrestore(&ag->lock, flags);
-
- ag71xx_rings_cleanup(ag);
+ ag71xx_hw_disable(ag);
return 0;
}
@@ -870,14 +883,10 @@ static void ag71xx_restart_work_func(struct work_struct *work)
{
struct ag71xx *ag = container_of(work, struct ag71xx, restart_work);
- if (ag71xx_get_pdata(ag)->is_ar724x) {
- ag->link = 0;
- ag71xx_link_adjust(ag);
- return;
- }
-
- ag71xx_stop(ag->dev);
- ag71xx_open(ag->dev);
+ rtnl_lock();
+ ag71xx_hw_disable(ag);
+ ag71xx_hw_enable(ag);
+ rtnl_unlock();
}
static bool ag71xx_check_dma_stuck(struct ag71xx *ag, unsigned long timestamp)
@@ -919,7 +928,7 @@ static int ag71xx_tx_packets(struct ag71xx *ag, bool flush)
struct sk_buff *skb = ring->buf[i].skb;
if (!flush && !ag71xx_desc_empty(desc)) {
- if (pdata->is_ar7240 &&
+ if (pdata->is_ar724x &&
ag71xx_check_dma_stuck(ag, ring->buf[i].timestamp))
schedule_work(&ag->restart_work);
break;
_______________________________________________
openwrt-devel mailing list
openwrt-devel at lists.openwrt.org
https://lists.openwrt.org/cgi-bin/mailman/listinfo/openwrt-devel
More information about the openwrt-devel
mailing list