
#include <linux/linkage.h>
#include "pm.h"

#define CACHE_LINE_SHIFT  6
#define CACHE_LINE_SIZE   (1 << CACHE_LINE_SHIFT)

#define lowpower_control_reg		(20*4)
#define lowpower_control_msk		(0x1f << 8)
#define lowpower_control_val_slfrfrsh	(0x02 << 8)

#define phy_pad_ctrl_reg_0_reg		(0x81*4)
#define phy_pad_ctrl_reg_0_pad_type	(1 << 8)

#define CMU_PLL3			0x08
#define CMU_PLL3_PD			(1 << 15)
#define CMU_PLL3_LD			(1 << 18)

	.text

/*
 * dmw_sleep_selfrefresh(u32 mode, u32 denali_base, u32 cmu_base)
 *
 * Puts the RAM into self refresh.
 *
 * The whole function runs out of ICACHE and TLB because no memory access is
 * possible after the RAM is in self refresh mode.
 *
 * Must be called with IRQs and FIQs disabled!
 */
	.align CACHE_LINE_SHIFT
ENTRY(dmw_sleep_selfrefresh)
	push {r4, r5, r6, r7}

	adr	r4, __start_of_prefetch
	adr	r5, __end_of_prefetch
	bic	r4, r4, #(CACHE_LINE_SIZE-1)

	/*
	 * Prefetch ITLB. The kernel uses 1M page tables for its code so its
	 * enough to prefetch start and end of function.
	 */
	mcr	p15, 0, r4, c10, c1, 1		@ Prefetch I-TLB
	mcr	p15, 0, r5, c10, c1, 1		@ Prefetch I-TLB

	/*
	 * Prefetch DTLB of peripherals
	 */
	mcr     p15, 0, r1, c10, c1, 0          @ Prefetch D-TLB of DDR controller
	mcr     p15, 0, r2, c10, c1, 0          @ Prefetch D-TLB of CMU

	/*
	 * Disable branch prediction. Otherwise the CPU will fetch instructions
	 * where it _thinks_ it may reach them. Doesn't play nice when the DRAM
	 * is not accessible...
	 */
	mrc	p15, 0, r6, c1, c0, 0
	bic	r6, r6, #(1 << 11)
	mcr	p15, 0, r6, c1, c0, 0

	/*
	 * Prefetch the whole function into L2CACHE. We're using the L2 preload
	 * engine to first clean the involved cache lines before filling them.
	 * Note that doing a simple loop to "ldr" a word from every cache line
	 * is dangerous because it might evict dirty cache lines which the CPU
	 * writes back asynchronously!
	 */
	sub	r5, r5, r4			@ calculate number of cache lines
	bic	r5, r5, #(CACHE_LINE_SIZE-1)

	mov	r6, #0
	mov	r7, #(1 << 30)

	@ clean and invalidate the involved cache lines

	mcr	p15, 0, r6, c11, c3, 2		@ Clear PLE
	mcr	p15, 0, r4, c11, c5, 0		@ start address
	mcr	p15, 0, r5, c11, c7, 0		@ end address
	mcr	p15, 0, r7, c11, c4, 0		@ transfer L2 -> DRAM
	mcr	p15, 0, r6, c11, c3, 1		@ start
1:
	mrc	p15, 0, r7, c11, c8, 0
	teq	r7, #3
	bne	1b

	@ now fill the cache lines

	mcr	p15, 0, r6, c11, c3, 2		@ Clear PLE
	mcr	p15, 0, r4, c11, c5, 0		@ start address
	mcr	p15, 0, r5, c11, c7, 0		@ end address
	mcr	p15, 0, r6, c11, c4, 0		@ transfer DRAM -> L2
	mcr	p15, 0, r6, c11, c3, 1		@ start
1:
	mrc	p15, 0, r7, c11, c8, 0
	teq	r7, #3
	bne	1b

	/*
	 * Put the DDR controller in self refresh.
	 *
	 * SLEEP_QUIRK_DIS_INP: switch pads to DDR1. Otherwise the DDR pads
	 * will consume too much power!
	 */
	tst	r0, #SLEEP_QUIRK_DIS_INP
	ldr	r4, [r1, #lowpower_control_reg]
	ldr	r5, [r1, #phy_pad_ctrl_reg_0_reg]

	orr	r4, r4, #lowpower_control_val_slfrfrsh
	bicne	r5, r5, #phy_pad_ctrl_reg_0_pad_type
	str	r4, [r1, #lowpower_control_reg]
__start_of_prefetch:
	dsb
	strne	r5, [r1, #phy_pad_ctrl_reg_0_reg]
	bic	r4, r4, #lowpower_control_msk
	orrne	r5, r5, #phy_pad_ctrl_reg_0_pad_type

	/*
	 * Switch off PLL3 if requested.
	 */
	tst	r0, #SLEEP_DDR_PLL_OFF
	beq	keep_pll

	ldr	r6, [r2, #CMU_PLL3]
	orr	r6, r6, #CMU_PLL3_PD
	str	r6, [r2, #CMU_PLL3]

keep_pll:
	/*
	 * Zzzzzzz
	 */
	dsb
	wfi

	/*
	 * Restore PLL3.
	 */
	beq	no_restore

	bic	r6, r6, #CMU_PLL3_PD
	str     r6, [r2, #CMU_PLL3]
	dsb
1:
	ldr	r6, [r2, #CMU_PLL3]
	tst	r6, #CMU_PLL3_LD
	beq	1b

no_restore:
	/*
	 * Switch back to DDR2 and exit self refresh mode.
	 */
	dsb
	str	r5, [r1, #phy_pad_ctrl_reg_0_reg]
	dsb
	str	r4, [r1, #lowpower_control_reg]
	dsb

__end_of_prefetch:
	/*
	 * Fill a cache line (16 instructions).
	 */
	nop
	nop
	nop
	nop
	nop
	nop
	nop
	nop
	nop
	nop
	nop
	nop
	nop
	nop
	nop
	nop

	/*
	 * Enable branch prediction again
	 */
	mrc	p15, 0, r4, c1, c0, 0
	orr	r4, r4, #(1 << 11)
	mcr	p15, 0, r4, c1, c0, 0

	pop	{r4, r5, r6, r7}
	mov	pc, lr

