[coreboot-gerrit] Patch set updated for coreboot: 7183f4f sandy/ivybridge: Native raminit.

Vladimir Serbinenko (phcoder@gmail.com) gerrit at coreboot.org
Tue May 27 09:12:49 CEST 2014


Vladimir Serbinenko (phcoder at gmail.com) just uploaded a new patch set to gerrit, which you can find at http://review.coreboot.org/5786

-gerrit

commit 7183f4fbe27474cccd45ec8a06106109e2f443ea
Author: Vladimir Serbinenko <phcoder at gmail.com>
Date:   Sun May 18 11:05:56 2014 +0200

    sandy/ivybridge: Native raminit.
    
    Based on damo22 work and my X230 tracing.
    
    Works for my X230 in a variety of RAM configs.
    
    Also-By: Damien Zammit <damien at zamaudio.com>
    Change-Id: I1aa024c55a8416fc53b25e7123037df0e55a2769
    Signed-off-by: Vladimir Serbinenko <phcoder at gmail.com>
---
 src/device/dram/ddr3.c                         |    8 +-
 src/include/device/dram/ddr3.h                 |   11 +-
 src/mainboard/lenovo/x230/romstage.c           |   72 +-
 src/northbridge/intel/sandybridge/Makefile.inc |    2 +
 src/northbridge/intel/sandybridge/pchinit.c    |  638 ++++
 src/northbridge/intel/sandybridge/raminit.c    | 3696 ++++++++++++++++++++++--
 src/northbridge/intel/sandybridge/raminit.h    |   16 +-
 7 files changed, 4193 insertions(+), 250 deletions(-)

diff --git a/src/device/dram/ddr3.c b/src/device/dram/ddr3.c
index 9b4f490..69782ab 100644
--- a/src/device/dram/ddr3.c
+++ b/src/device/dram/ddr3.c
@@ -110,7 +110,7 @@ int spd_decode_ddr3(dimm_attr * dimm, spd_raw_data spd)
 {
 	int ret;
 	u16 crc, spd_crc;
-	u8 ftb_divisor, ftb_dividend, capacity_shift, bus_width, sdram_width;
+	u8 ftb_divisor, ftb_dividend, capacity_shift, bus_width;
 	u8 reg8;
 	u32 mtb;		/* medium time base */
 	unsigned int val, param;
@@ -209,8 +209,8 @@ int spd_decode_ddr3(dimm_attr * dimm, spd_raw_data spd)
 		printram("  Invalid SDRAM width\n");
 		ret = SPD_STATUS_INVALID_FIELD;
 	}
-	sdram_width = (4 << val);
-	printram("  SDRAM width       : %u\n", sdram_width);
+	dimm->width = (4 << val);
+	printram("  SDRAM width       : %u\n", dimm->width);
 
 	/* Memory bus width */
 	reg8 = spd[8];
@@ -236,7 +236,7 @@ int spd_decode_ddr3(dimm_attr * dimm, spd_raw_data spd)
 	 * capacity_shift
 	 * The rest is the JEDEC formula */
 	dimm->size_mb = ((1 << (capacity_shift + (25 - 20))) * bus_width
-			 * dimm->ranks) / sdram_width;
+			 * dimm->ranks) / dimm->width;
 
 	/* Fine Timebase (FTB) Dividend/Divisor */
 	/* Dividend */
diff --git a/src/include/device/dram/ddr3.h b/src/include/device/dram/ddr3.h
index b19c51c..fcaf10b 100644
--- a/src/include/device/dram/ddr3.h
+++ b/src/include/device/dram/ddr3.h
@@ -37,6 +37,7 @@
  * @{
  */
 #define TCK_1066MHZ     240
+#define TCK_933MHZ	275
 #define TCK_800MHZ      320
 #define TCK_666MHZ      384
 #define TCK_533MHZ      480
@@ -54,11 +55,11 @@
  * disabled.
  * @{
  */
-#if defined(CONFIG_DEBUG_RAM_SETUP) && (CONFIG_DEBUG_RAM_SETUP)
+//#if defined(CONFIG_DEBUG_RAM_SETUP) && (CONFIG_DEBUG_RAM_SETUP)
 #define printram(x, ...) printk(BIOS_DEBUG, x, ##__VA_ARGS__)
-#else
-#define printram(x, ...)
-#endif
+//#else
+//#define printram(x, ...)
+//#endif
 /** @} */
 
 /*
@@ -137,6 +138,8 @@ typedef struct dimm_attr_st {
 	u16 cas_supported;
 	/* Flags extracted from SPD */
 	dimm_flags_t flags;
+	/* SDRAM width */
+	u8 width;
 	/* Number of ranks */
 	u8 ranks;
 	/* Number or row address bits */
diff --git a/src/mainboard/lenovo/x230/romstage.c b/src/mainboard/lenovo/x230/romstage.c
index 6e4e685..dba19b1 100644
--- a/src/mainboard/lenovo/x230/romstage.c
+++ b/src/mainboard/lenovo/x230/romstage.c
@@ -114,60 +114,13 @@ void main(unsigned long bist)
 	int cbmem_was_initted;
 	u32 pm1_cnt;
 	u16 pm1_sts;
+	spd_raw_data spd[4];
 
 	if (MCHBAR16(SSKPD) == 0xCAFE) {
 		outb(0x6, 0xcf9);
 		hlt ();
 	}
 
-	struct pei_data pei_data = {
-		.pei_version = PEI_VERSION,
-		.mchbar = DEFAULT_MCHBAR,
-		.dmibar = DEFAULT_DMIBAR,
-		.epbar = DEFAULT_EPBAR,
-		.pciexbar = CONFIG_MMCONF_BASE_ADDRESS,
-		.smbusbar = SMBUS_IO_BASE,
-		.wdbbar = 0x4000000,
-		.wdbsize = 0x1000,
-		.hpet_address = CONFIG_HPET_ADDRESS,
-		.rcba = DEFAULT_RCBABASE,
-		.pmbase = DEFAULT_PMBASE,
-		.gpiobase = DEFAULT_GPIOBASE,
-		.thermalbase = 0xfed08000,
-		.system_type = 0, // 0 Mobile, 1 Desktop/Server
-		.tseg_size = CONFIG_SMM_TSEG_SIZE,
-		.spd_addresses = { 0xA0, 0x00,0xA2,0x00 },
-		.ts_addresses = { 0x00, 0x00, 0x00, 0x00 },
-		.ec_present = 1,
-		.gbe_enable = 1,
-		.ddr3lv_support = 0,
-		// 0 = leave channel enabled
-		// 1 = disable dimm 0 on channel
-		// 2 = disable dimm 1 on channel
-		// 3 = disable dimm 0+1 on channel
-		.dimm_channel0_disabled = 2,
-		.dimm_channel1_disabled = 2,
-		.max_ddr3_freq = 1600,
-		.usb_port_config = {
-			 /* enabled   usb oc pin    length */
-			{ 1, 0, 0x0080 }, /* P0 (left, fan side), OC 0 */
-			{ 1, 1, 0x0080 }, /* P1 (left touchpad side), OC 1 */
-			{ 1, 3, 0x0080 }, /* P2: dock, OC 3 */
-			{ 1, 0, 0x0040 }, /* P3: wwan, no OC */
-			{ 1, 0, 0x0080 }, /* P4: Wacom tablet on X230t, otherwise empty */
-			{ 1, 0, 0x0080 }, /* P5: Expresscard, no OC */
-			{ 0, 0, 0x0000 }, /* P6: Empty */
-			{ 1, 0, 0x0080 }, /* P7: dock, no OC */
-			{ 0, 0, 0x0000 }, /* P8: Empty */
-			{ 1, 5, 0x0080 }, /* P9: Right (EHCI debug), OC 5 */
-			{ 1, 0, 0x0040 }, /* P10: fingerprint reader, no OC */
-			{ 1, 0, 0x0040 }, /* P11: bluetooth, no OC. */
-			{ 1, 0, 0x0040 }, /* P12: wlan, no OC */
-			{ 1, 0, 0x0080 }, /* P13: webcam, no OC */
-		},
-		.ddr_refresh_rate_config = 2, /* Force double refresh rate */
-	};
-
 	timestamp_init(get_initial_timestamp());
 	timestamp_add_now(TS_START_ROMSTAGE);
 
@@ -224,24 +177,13 @@ void main(unsigned long bist)
 	post_code(0x39);
 
 	post_code(0x3a);
-	pei_data.boot_mode = boot_mode;
 	timestamp_add_now(TS_BEFORE_INITRAM);
 
-	/* MRC.bin has a bug and sometimes halts (instead of reboot?).
-	 */
-	if (boot_mode != 2)
-	  {
-		  RCBA32(GCS) = RCBA32(GCS) & ~(1 << 5);	/* reset */
-		  outw((0 << 11), DEFAULT_PMBASE | 0x60 | 0x08);	/* let timer go */
-	  }
-
-	sdram_initialize(&pei_data);
+	memset (spd, 0, sizeof (spd));
+	read_spd (&spd[0], 0x50);
+	read_spd (&spd[2], 0x51);
 
-	if (boot_mode != 2)
-	  {
-		  RCBA32(GCS) = RCBA32(GCS) | (1 << 5);	/* No reset */
-		  outw((1 << 11), DEFAULT_PMBASE | 0x60 | 0x08);	/* halt timer */
-	  }
+	init_dram_ddr3 (spd, 1);
 
 	timestamp_add_now(TS_AFTER_INITRAM);
 	post_code(0x3c);
@@ -254,8 +196,8 @@ void main(unsigned long bist)
 
 	MCHBAR16(SSKPD) = 0xCAFE;
 	cbmem_was_initted = !cbmem_recovery(boot_mode==2);
-	if (boot_mode!=2)
-		save_mrc_data(&pei_data);
+//	if (boot_mode!=2)
+//		save_mrc_data(&pei_data);
 
 #if CONFIG_HAVE_ACPI_RESUME
 	/* If there is no high memory area, we didn't boot before, so
diff --git a/src/northbridge/intel/sandybridge/Makefile.inc b/src/northbridge/intel/sandybridge/Makefile.inc
index 6655e2a..707a8d0 100644
--- a/src/northbridge/intel/sandybridge/Makefile.inc
+++ b/src/northbridge/intel/sandybridge/Makefile.inc
@@ -27,6 +27,8 @@ ramstage-y += mrccache.c
 
 romstage-y += ram_calc.c
 romstage-y += raminit.c
+romstage-y += pchinit.c
+romstage-y += ../../../device/dram/ddr3.c
 romstage-y += mrccache.c
 romstage-y += early_init.c
 romstage-y += report_platform.c
diff --git a/src/northbridge/intel/sandybridge/pchinit.c b/src/northbridge/intel/sandybridge/pchinit.c
new file mode 100644
index 0000000..e03cfa8
--- /dev/null
+++ b/src/northbridge/intel/sandybridge/pchinit.c
@@ -0,0 +1,638 @@
+/*
+ * This file is part of the coreboot project.
+ *
+ * Copyright (C) 2014 Vladimir Serbinenko <phcoder at gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <console/console.h>
+#include <string.h>
+#include <arch/hlt.h>
+#include <arch/io.h>
+#include <cbmem.h>
+#include <arch/cbfs.h>
+#include <cbfs.h>
+#include <ip_checksum.h>
+#include <pc80/mc146818rtc.h>
+#include <device/pci_def.h>
+#include "raminit.h"
+#include "pei_data.h"
+#include "sandybridge.h"
+#include <delay.h>
+
+/* Management Engine is in the southbridge */
+#include "southbridge/intel/bd82x6x/me.h"
+#include "southbridge/intel/bd82x6x/pch.h"
+#include <cpu/x86/msr.h>
+#include <cpu/cpu.h>
+#include "cpu/intel/model_2065x/model_2065x.h"
+
+#define SOUTHBRIDGE PCI_DEV(0, 0x1f, 0)
+#define NORTHBRIDGE PCI_DEV(0, 0x0, 0)
+#define GFX_DEV PCI_DEV(0, 0x2, 0)
+
+static int
+wait_2338 (void)
+{
+  int timeout;
+
+  timeout = 1000;
+  while (1)
+    {
+      if (!(read8 (DEFAULT_RCBABASE | 0x2338) & 1))
+	return 0;
+      if (!timeout--)
+	return -1;
+    }
+}
+
+static int
+read_2338 (u32 edx, u32 * result)
+{
+  int ret;
+
+  write32 (DEFAULT_RCBABASE | 0x2330, edx);
+  write16 (DEFAULT_RCBABASE | 0x2338, (read16 (DEFAULT_RCBABASE | 0x2338)
+				       & 0x1ff) | 0x600);
+  ret = wait_2338 ();
+  if (ret < 0)
+    return ret;
+  *result = read32 (DEFAULT_RCBABASE | 0x2334);	// !!! = 0x00590133
+  ret = wait_2338 ();
+  if (ret < 0)
+    return ret;
+  if (read8 (DEFAULT_RCBABASE | 0x2338) & 6)
+    return -2;
+  return 0;
+}
+
+static int
+and_or_2338 (u32 edx, u32 and, u32 or)
+{
+  u32 t1;
+  int ret;
+  ret = read_2338 (edx, &t1);
+  if (ret < 0)
+    return ret;
+  write16 (DEFAULT_RCBABASE | 0x2338, (read16 (DEFAULT_RCBABASE | 0x2338)
+				       & 0x1ff) | 0x600);
+  t1 &= and;
+  t1 |= or;
+  ret = wait_2338 ();
+  if (ret < 0)
+    return ret;
+
+  write32 (DEFAULT_RCBABASE | 0x2334, t1);
+  ret = wait_2338 ();
+  if (ret < 0)
+    return ret;
+  write16 (DEFAULT_RCBABASE | 0x2338,
+	   (read16 (DEFAULT_RCBABASE | 0x2338) & 0x1ff) | 0x600);
+  if (read8 (DEFAULT_RCBABASE | 0x2338) & 6)
+    return -2;
+  return 0;
+}
+
+#define USB_ACC_CONTROL 0x80
+
+static void
+init_usb (void)
+{
+  u32 base;
+  pcie_read_config32 (SOUTHBRIDGE, 0xf0);	// !!! = 0xfed1c001
+  read32 (DEFAULT_RCBABASE | 0x3418);	// !!! = 0x06000000
+  pcie_read_config16 (SOUTHBRIDGE, 0x40);	// !!! = 0x0501
+  read32 (DEFAULT_RCBABASE | 0x3598);	// !!! = 0x00000000
+  pcie_read_config32 (PCI_DEV (0, 0x1d, 0), 0x10);	// !!! = 0x00000000
+  pcie_read_config16 (PCI_DEV (0, 0x1d, 0), 0x04);	// !!! = 0x0000
+  base = 0xe8000000;
+  pcie_write_config32 (PCI_DEV (0, 0x1d, 0), 0x10, base);
+  pcie_read_config16 (PCI_DEV (0, 0x1d, 0), 0x04);	// !!! = 0x0000
+  pcie_write_config16 (PCI_DEV (0, 0x1d, 0), 0x04, 0x0006);
+
+  pcie_write_config16 (PCI_DEV (0, 0x1d, 0), USB_ACC_CONTROL,
+		       pcie_read_config16 (PCI_DEV (0, 0x1d, 0),
+					   USB_ACC_CONTROL) | 1);
+  read32 (base + 4);		// !!! = 0x00204008
+  write32 (base + 4, 0x00200008);
+  read32 (base + 4);		// !!! = 0x00200008
+  write32 (base + 4, 0x00200003);
+  pcie_write_config16 (PCI_DEV (0, 0x1d, 0), USB_ACC_CONTROL,
+		       pcie_read_config16 (PCI_DEV (0, 0x1d, 0),
+					   USB_ACC_CONTROL) & ~1);
+  read32 (0xe8000024);		// !!! = 0x00001000
+  write16 (0xe8000020, read16 (0xe8000020) | 2);
+  pcie_read_config32 (PCI_DEV (0, 0x1d, 0), 0x84);	// !!! = 0x83088e01
+  pcie_write_config32 (PCI_DEV (0, 0x1d, 0), 0x84, 0x930c8811);
+  pcie_read_config32 (PCI_DEV (0, 0x1d, 0), 0x88);	// !!! = 0x04000030
+  pcie_write_config32 (PCI_DEV (0, 0x1d, 0), 0x88, 0x24000d30);
+  pcie_read_config32 (PCI_DEV (0, 0x1d, 0), 0xf4);	// !!! = 0x00408588
+  pcie_write_config32 (PCI_DEV (0, 0x1d, 0), 0xf4, 0x80408588);
+  pcie_read_config32 (PCI_DEV (0, 0x1d, 0), 0xf4);	// !!! = 0x80408588
+  pcie_write_config32 (PCI_DEV (0, 0x1d, 0), 0xf4, 0x80808588);
+  pcie_read_config32 (PCI_DEV (0, 0x1d, 0), 0xf4);	// !!! = 0x80808588
+  pcie_write_config32 (PCI_DEV (0, 0x1d, 0), 0xf4, 0x00808588);
+  pcie_read_config32 (PCI_DEV (0, 0x1d, 0), 0xfc);	// !!! = 0x20591708
+  pcie_write_config32 (PCI_DEV (0, 0x1d, 0), 0xfc, 0x205b1708);
+  write32 (DEFAULT_RCBABASE | 0x3560,
+	   read32 (DEFAULT_RCBABASE | 0x3560) | 0x20c8000);
+  pcie_read_config16 (PCI_DEV (0, 0x1d, 0), 0x04);	// !!! = 0x0006
+  pcie_write_config16 (PCI_DEV (0, 0x1d, 0), 0x04, 0x0000);
+  pcie_write_config32 (PCI_DEV (0, 0x1d, 0), 0x10, 0x00000000);
+  base = 0xfef00000;
+  pcie_read_config32 (PCI_DEV (0, 0x1a, 0), 0x10);	// !!! = 0xfef00000
+  pcie_read_config32 (PCI_DEV (0, 0x1a, 0), 0x10);	// !!! = 0xfef00000
+  pcie_read_config16 (PCI_DEV (0, 0x1a, 0), 0x04);	// !!! = 0x0002
+  pcie_read_config16 (PCI_DEV (0, 0x1a, 0), 0x04);	// !!! = 0x0002
+  pcie_write_config16 (PCI_DEV (0, 0x1a, 0), 0x04, 0x0006);
+  pcie_write_config16 (PCI_DEV (0, 0x1a, 0), 0x80,
+		       pcie_read_config16 (PCI_DEV (0, 0x1a, 0), 0x80) | 1);
+  read32 (base + 4);		// !!! = 0x00203006
+  write32 (base + 4, 0x00200006);
+  read32 (base + 4);		// !!! = 0x00200006
+  write32 (base + 4, 0x00200003);
+  pcie_write_config16 (PCI_DEV (0, 0x1a, 0), USB_ACC_CONTROL,
+		       pcie_read_config16 (PCI_DEV (0, 0x1a, 0),
+					   USB_ACC_CONTROL) & ~1);
+  pcie_read_config32 (PCI_DEV (0, 0x1a, 0), 0x84);	// !!! = 0x83088e01
+  pcie_write_config32 (PCI_DEV (0, 0x1a, 0), 0x84, 0x930c8811);
+  pcie_read_config32 (PCI_DEV (0, 0x1a, 0), 0x88);	// !!! = 0x04000030
+  pcie_write_config32 (PCI_DEV (0, 0x1a, 0), 0x88, 0x24000d30);
+  pcie_read_config32 (PCI_DEV (0, 0x1a, 0), 0xf4);	// !!! = 0x00408588
+  pcie_write_config32 (PCI_DEV (0, 0x1a, 0), 0xf4, 0x80408588);
+  pcie_read_config32 (PCI_DEV (0, 0x1a, 0), 0xf4);	// !!! = 0x80408588
+  pcie_write_config32 (PCI_DEV (0, 0x1a, 0), 0xf4, 0x80808588);
+  pcie_read_config32 (PCI_DEV (0, 0x1a, 0), 0xf4);	// !!! = 0x80808588
+  pcie_write_config32 (PCI_DEV (0, 0x1a, 0), 0xf4, 0x00808588);
+  pcie_read_config32 (PCI_DEV (0, 0x1a, 0), 0xfc);	// !!! = 0x20591708
+  pcie_write_config32 (PCI_DEV (0, 0x1a, 0), 0xfc, 0x205b1708);
+  write32 (DEFAULT_RCBABASE | 0x3560,
+	   read32 (DEFAULT_RCBABASE | 0x3560) | 0x20c8000);
+  pcie_write_config16 (PCI_DEV (0, 0x1a, 0), 0x04, 0x0002);
+
+  pcie_read_config16 (SOUTHBRIDGE, 0x02);	// !!! = 0x1e55
+  outw (inw (DEFAULT_PMBASE | 0x003c) | 2, DEFAULT_PMBASE | 0x003c);
+  u16 reg_359c = 0x150;
+  int i;
+  for (i = 0; i < 14; i++)
+    if (reg_359c & (1 << i))
+      write16 (DEFAULT_RCBABASE | 0x359c,
+	       read16 (DEFAULT_RCBABASE | 0x359c) | (1 << i));
+    else
+      write16 (DEFAULT_RCBABASE | 0x359c,
+	       read16 (DEFAULT_RCBABASE | 0x359c) & ~(1 << i));
+  pcie_read_config8 (SOUTHBRIDGE, 0x08);	// !!! = 0x04
+  pcie_read_config16 (SOUTHBRIDGE, 0x02);	// !!! = 0x1e55
+  pcie_read_config32 (PCI_DEV (0, 0x14, 0), 0xe4);	// !!! = 0x00000000
+  pcie_write_config32 (PCI_DEV (0, 0x14, 0), 0xe4, 0x00000000);
+  outw (0x0000, DEFAULT_PMBASE | 0x003c);
+  write32 (DEFAULT_RCBABASE | 0x3418, 0x16001fe0);
+  read32 (DEFAULT_RCBABASE | 0x3418);	// !!! = 0x16001fe0
+}
+
+static void
+init_thermal (void)
+{
+  /* OK { */
+  pcie_write_config32 (PCI_DEV (0, 0x1f, 6), 0x40, 0xfed08000);
+  pcie_write_config32 (PCI_DEV (0, 0x1f, 6), 0x44, 0x00000000);
+  pcie_read_config32 (PCI_DEV (0, 0x1f, 6), 0x40);	// !!! = 0xfed08004
+  pcie_write_config32 (PCI_DEV (0, 0x1f, 6), 0x40, 0xfed08005);
+  write16 (0xfed08004, 0x3a2b);
+  write8 (0xfed0800c, 0xff);
+  write8 (0xfed0800d, 0x00);
+  write8 (0xfed0800e, 0x40);
+  write8 (0xfed08082, 0x00);
+  write8 (0xfed08001, 0xba);
+  msr_t msr;
+  msr = rdmsr (MSR_TEMPERATURE_TARGET);	// !!! = 0x0000000000691200
+  write16 (0xfed08012, ((msr.lo >> 16) & 0xff) << 6);
+  write16 (0xfed08016, 0x808c);
+
+
+  write32 (DEFAULT_RCBABASE | 0x38b0,
+	   (read32 (DEFAULT_RCBABASE | 0x38b0) & 0xffff8003) | 0x403c);
+
+  write16 (0xfed08014, 0xde87);
+  /* } OK */
+  if (read32 (DEFAULT_RCBABASE | 0x38b4) & 0x4000)	// !!! = 0x0000437e
+    write16 (0xfed0801a, 0x0000);
+  else
+    {
+      /* ? */
+    }
+  pcie_read_config32 (PCI_DEV (0, 0x1f, 6), 0x40);	// !!! = 0xfed08005
+  pcie_write_config32 (PCI_DEV (0, 0x1f, 6), 0x40, 0xfed08004);
+  pcie_write_config32 (PCI_DEV (0, 0x1f, 6), 0x40, 0x00000000);
+}
+
+void
+pch_init (void)
+{
+  int i;
+
+  pcie_read_config16 (SOUTHBRIDGE, PCI_DEVICE_ID);	// !!! = 0x1e55
+  pcie_read_config32 (SOUTHBRIDGE, RCBA);	// !!! = 0xfed1c001
+  pcie_write_config32 (SOUTHBRIDGE, RCBA, DEFAULT_RCBA | 1);
+  pcie_read_config32 (SOUTHBRIDGE, PMBASE);	// !!! = 0x00000501
+  pcie_write_config32 (SOUTHBRIDGE, PMBASE, DEFAULT_PMBASE | 1);
+  pcie_write_config8 (SOUTHBRIDGE, ACPI_CNTL,
+		      pcie_read_config8 (SOUTHBRIDGE, ACPI_CNTL) | ACPI_EN);
+
+  /* Undocumented.  */
+  pcie_write_config8 (SOUTHBRIDGE, 0xa6,
+		      pcie_read_config8 (SOUTHBRIDGE, 0xa6) | 2);
+
+  pcie_read_config32 (SOUTHBRIDGE, GPIO_BASE);	// !!! = 0x00000481
+  pcie_write_config32 (SOUTHBRIDGE, GPIO_BASE, DEFAULT_GPIOBASE | 1);
+  pcie_read_config8 (SOUTHBRIDGE, GPIO_CNTL);	// !!! = 0x10
+  pcie_write_config8 (SOUTHBRIDGE, GPIO_CNTL, 0x10);
+  pcie_read_config16 (SOUTHBRIDGE, PCI_DEVICE_ID);	// !!! = 0x1e55
+
+  write32 (DEFAULT_RCBABASE | 0x2088, 0x00109000);
+  read32 (DEFAULT_RCBABASE | 0x20ac);	// !!! = 0x00000000
+  write32 (DEFAULT_RCBABASE | 0x20ac, 0x40000000);
+  write32 (DEFAULT_RCBABASE | 0x100c, 0x01110000);
+  write8 (DEFAULT_RCBABASE | 0x2340, 0x1b);
+  read32 (DEFAULT_RCBABASE | 0x2314);	// !!! = 0x0a080000
+  write32 (DEFAULT_RCBABASE | 0x2314, 0x0a280000);
+  read32 (DEFAULT_RCBABASE | 0x2310);	// !!! = 0xc809605b
+  write32 (DEFAULT_RCBABASE | 0x2310, 0xa809605b);
+  write32 (DEFAULT_RCBABASE | 0x2324, 0x00854c74);
+  read8 (DEFAULT_RCBABASE | 0x0400);	// !!! = 0x00
+  read32 (DEFAULT_RCBABASE | 0x2310);	// !!! = 0xa809605b
+  write32 (DEFAULT_RCBABASE | 0x2310, 0xa809605b);
+  read32 (DEFAULT_RCBABASE | 0x2310);	// !!! = 0xa809605b
+  write32 (DEFAULT_RCBABASE | 0x2310, 0xa809605b);
+
+  and_or_2338 (0xea007f62, 0, 0x00590133);
+  and_or_2338 (0xec007f62, 0, 0x00590133);
+  and_or_2338 (0xec007f64, 0, 0x59555588);
+  and_or_2338 (0xea0040b9, 0, 0x0001051c);
+  and_or_2338 (0xeb0040a1, 0, 0x800084ff);
+  and_or_2338 (0xec0040a1, 0, 0x800084ff);
+  and_or_2338 (0xea004001, 0, 0x00008400);
+  and_or_2338 (0xeb004002, 0, 0x40201758);
+  and_or_2338 (0xec004002, 0, 0x40201758);
+  and_or_2338 (0xea004002, 0, 0x00601758);
+  and_or_2338 (0xea0040a1, 0, 0x810084ff);
+  and_or_2338 (0xeb0040b1, 0, 0x0001c598);
+  and_or_2338 (0xec0040b1, 0, 0x0001c598);
+  and_or_2338 (0xeb0040b6, 0, 0x0001c598);
+  and_or_2338 (0xea0000a9, 0, 0x80ff969f);
+  and_or_2338 (0xea0001a9, 0, 0x80ff969f);
+  and_or_2338 (0xeb0040b2, 0, 0x0001c396);
+  and_or_2338 (0xeb0040b3, 0, 0x0001c396);
+  and_or_2338 (0xec0040b2, 0, 0x0001c396);
+  and_or_2338 (0xea0001a9, 0, 0x80ff94ff);
+  and_or_2338 (0xea000151, 0, 0x0088037f);
+  and_or_2338 (0xea0000a9, 0, 0x80ff94ff);
+  and_or_2338 (0xea000051, 0, 0x0088037f);
+
+  u32 reg32;
+  read_2338 (0xea007f05, &reg32);
+
+  and_or_2338 (0xea007f05, 0, 0x00010642);
+  and_or_2338 (0xea0040b7, 0, 0x0001c91c);
+  and_or_2338 (0xea0040b8, 0, 0x0001c91c);
+  and_or_2338 (0xeb0040a1, 0, 0x820084ff);
+  and_or_2338 (0xec0040a1, 0, 0x820084ff);
+  and_or_2338 (0xea007f0a, 0, 0xc2480000);
+
+
+  pcie_read_config8 (SOUTHBRIDGE, 0x08);	// !!! = 0x04
+  pcie_read_config16 (SOUTHBRIDGE, 0x02);	// !!! = 0x1e55
+
+  and_or_2338 (0xec00404d, 0, 0x1ff177f);
+  and_or_2338 (0xec000084, 0, 0x5a600000);
+  and_or_2338 (0xec000184, 0, 0x5a600000);
+  and_or_2338 (0xec000284, 0, 0x5a600000);
+  and_or_2338 (0xec000384, 0, 0x5a600000);
+  and_or_2338 (0xec000094, 0, 0x000f0501);
+  and_or_2338 (0xec000194, 0, 0x000f0501);
+  and_or_2338 (0xec000294, 0, 0x000f0501);
+  and_or_2338 (0xec000394, 0, 0x000f0501);
+  and_or_2338 (0xec000096, 0, 0x00000001);
+  and_or_2338 (0xec000196, 0, 0x00000001);
+  and_or_2338 (0xec000296, 0, 0x00000001);
+  and_or_2338 (0xec000396, 0, 0x00000001);
+  and_or_2338 (0xec000001, 0, 0x00008c08);
+  and_or_2338 (0xec000101, 0, 0x00008c08);
+  and_or_2338 (0xec000201, 0, 0x00008c08);
+  and_or_2338 (0xec000301, 0, 0x00008c08);
+  and_or_2338 (0xec0040b5, 0, 0x0001c518);
+  and_or_2338 (0xec000087, 0, 0x06077597);
+  and_or_2338 (0xec000187, 0, 0x06077597);
+  and_or_2338 (0xec000287, 0, 0x06077597);
+  and_or_2338 (0xec000387, 0, 0x06077597);
+  and_or_2338 (0xea000050, 0, 0x00bb0157);
+  and_or_2338 (0xea000150, 0, 0x00bb0157);
+  and_or_2338 (0xec007f60, 0, 0x77777d77);
+  and_or_2338 (0xea00008d, 0, 0x01320000);
+  and_or_2338 (0xea00018d, 0, 0x01320000);
+
+  read16 (DEFAULT_RCBABASE | 0x0400);	// !!! = 0x0b00
+  read32 (DEFAULT_RCBABASE | 0x0400);	// !!! = 0x00000b00
+
+  and_or_2338 (0xec0007b2, 0, 0x04514b5e);
+  and_or_2338 (0xec00078c, 0, 0x40000200);
+  and_or_2338 (0xec000780, 0, 0x02000020);
+
+  read8 (DEFAULT_RCBABASE | 0x3414);	// !!! = 0x00
+  pcie_read_config16 (SOUTHBRIDGE, 0x02);	// !!! = 0x1e55
+
+  read32 (DEFAULT_RCBABASE | 0x3598);	// !!! = 0x00000001
+  read32 (DEFAULT_RCBABASE | 0x3598);	// !!! = 0x00000001
+  write32 (DEFAULT_RCBABASE | 0x3598, 0x00000000);
+  read32 (DEFAULT_RCBABASE | 0x3598);	// !!! = 0x00000000
+  pcie_read_config32 (PCI_DEV (0, 0x1d, 0), 0x88);	// !!! = 0x04000030
+  pcie_write_config32 (PCI_DEV (0, 0x1d, 0), 0x88, 0x04000030);
+  pcie_read_config32 (PCI_DEV (0, 0x1a, 0), 0x88);	// !!! = 0x04000030
+  pcie_write_config32 (PCI_DEV (0, 0x1a, 0), 0x88, 0x04000030);
+
+  read32 (DEFAULT_RCBABASE | 0x3410);	// !!! = 0x00000c20
+  write32 (DEFAULT_RCBABASE | 0x3410, 0x00000c20);
+  read32 (DEFAULT_RCBABASE | 0x3410);	// !!! = 0x00000c20
+  /* Disable SATA2. */
+  write32 (DEFAULT_RCBABASE | 0x3418,
+	   read32 (DEFAULT_RCBABASE | 0x3418) | 0x02000000);
+  read32 (DEFAULT_RCBABASE | 0x3418);	// !!! = 0x06000000
+
+  /* Configure thermal */
+  init_thermal ();
+
+  read8 (DEFAULT_RCBABASE | 0x31fe);	// !!! = 0x00
+  read16 (DEFAULT_RCBABASE | 0x31fe);	// !!! = 0x0000
+  write16 (DEFAULT_RCBABASE | 0x31fe, 0x0100);
+  read16 (DEFAULT_RCBABASE | 0x31fe);	// !!! = 0x0100
+  read8 (DEFAULT_RCBABASE | 0x31fe);	// !!! = 0x00
+  write8 (0xfec00000, 0x00);
+  read32 (0xfec00010);		// !!! = 0x00000000
+  pcie_write_config32 (PCI_DEV (0, 0x1f, 3), 0x20, 0x00000400);
+  pcie_read_config8 (PCI_DEV (0, 0x1f, 3), 0x04);	// !!! = 0x01
+  pcie_write_config8 (PCI_DEV (0, 0x1f, 3), 0x04, 0x01);
+  pcie_read_config8 (PCI_DEV (0, 0x1f, 3), 0x40);	// !!! = 0x01
+  pcie_write_config8 (PCI_DEV (0, 0x1f, 3), 0x40, 0x09);
+  pcie_read_config8 (PCI_DEV (0, 0x1f, 3), 0x40);	// !!! = 0x01
+  pcie_write_config8 (PCI_DEV (0, 0x1f, 3), 0x40, 0x01);
+  outb (0xff, 0x0400);
+  init_usb ();
+  pcie_read_config16 (NORTHBRIDGE, 0xe4);	// !!! = 0x619b
+  pcie_read_config16 (NORTHBRIDGE, 0x02);	// !!! = 0x0154
+  cpuid_ext (0x1, 0x0);		// !!! = 0x00000000000306a9
+  pcie_write_config32 (NORTHBRIDGE, 0x48, 0xfed10001);
+  pcie_write_config32 (NORTHBRIDGE, 0x4c, 0x00000000);
+  pcie_write_config32 (NORTHBRIDGE, 0x68, 0xfed18001);
+  pcie_write_config32 (NORTHBRIDGE, 0x6c, 0x00000000);
+  pcie_write_config32 (NORTHBRIDGE, 0x40, 0xfed19001);
+  pcie_write_config32 (NORTHBRIDGE, 0x44, 0x00000000);
+  read32 (DEFAULT_MCHBAR | 0x5f00);	// !!! = 0x0000270f
+  write32 (DEFAULT_MCHBAR | 0x5f00, 0x0000270f);
+  cpuid_ext (0x1, 0x0);		// !!! = 0x00000000000306a9
+  pcie_read_config16 (NORTHBRIDGE, 0x02);	// !!! = 0x0154
+  pcie_read_config32 (NORTHBRIDGE, 0x68);	// !!! = 0xfed18001
+  pcie_read_config32 (NORTHBRIDGE, 0x6c);	// !!! = 0x00000000
+  write32 (DEFAULT_DMIBAR | 0x0914,
+	   read32 (DEFAULT_DMIBAR | 0x0914) | 0x80000000);
+  write32 (DEFAULT_DMIBAR | 0x0934,
+	   read32 (DEFAULT_DMIBAR | 0x0934) | 0x80000000);
+  for (i = 0; i < 4; i++)
+    {
+      write32 (DEFAULT_DMIBAR | 0x0a00 | (i << 4),
+	       read32 (DEFAULT_DMIBAR | 0x0a00 | (i << 4)) & 0xf3ffffff);
+      write32 (DEFAULT_DMIBAR | 0x0a04 | (i << 4),
+	       read32 (DEFAULT_DMIBAR | 0x0a04 | (i << 4)) | 0x800);
+    }
+  write32 (DEFAULT_DMIBAR | 0x0c30, (read32 (DEFAULT_DMIBAR | 0x0c30)
+				     & 0xfffffff) | 0x40000000);
+  for (i = 0; i < 2; i++)
+    {
+      write32 (DEFAULT_DMIBAR | 0x0904 | (i << 5),
+	       read32 (DEFAULT_DMIBAR | 0x0904 | (i << 5)) & 0xfe3fffff);
+      write32 (DEFAULT_DMIBAR | 0x090c | (i << 5),
+	       read32 (DEFAULT_DMIBAR | 0x090c | (i << 5)) & 0xfff1ffff);
+    }
+  write32 (DEFAULT_DMIBAR | 0x090c,
+	   read32 (DEFAULT_DMIBAR | 0x090c) & 0xfe1fffff);
+  write32 (DEFAULT_DMIBAR | 0x092c,
+	   read32 (DEFAULT_DMIBAR | 0x092c) & 0xfe1fffff);
+  read32 (DEFAULT_DMIBAR | 0x0904);	// !!! = 0x7a1842ec
+  write32 (DEFAULT_DMIBAR | 0x0904, 0x7a1842ec);
+  read32 (DEFAULT_DMIBAR | 0x090c);	// !!! = 0x00000208
+  write32 (DEFAULT_DMIBAR | 0x090c, 0x00000128);
+  read32 (DEFAULT_DMIBAR | 0x0924);	// !!! = 0x7a1842ec
+  write32 (DEFAULT_DMIBAR | 0x0924, 0x7a1842ec);
+  read32 (DEFAULT_DMIBAR | 0x092c);	// !!! = 0x00000208
+  write32 (DEFAULT_DMIBAR | 0x092c, 0x00000128);
+  read32 (DEFAULT_DMIBAR | 0x0700);	// !!! = 0x46139008
+  write32 (DEFAULT_DMIBAR | 0x0700, 0x46139008);
+  read32 (DEFAULT_DMIBAR | 0x0720);	// !!! = 0x46139008
+  write32 (DEFAULT_DMIBAR | 0x0720, 0x46139008);
+  read32 (DEFAULT_DMIBAR | 0x0c04);	// !!! = 0x2e680008
+  write32 (DEFAULT_DMIBAR | 0x0c04, 0x2e680008);
+  read32 (DEFAULT_DMIBAR | 0x0904);	// !!! = 0x7a1842ec
+  write32 (DEFAULT_DMIBAR | 0x0904, 0x3a1842ec);
+  read32 (DEFAULT_DMIBAR | 0x0924);	// !!! = 0x7a1842ec
+  write32 (DEFAULT_DMIBAR | 0x0924, 0x3a1842ec);
+  read32 (DEFAULT_DMIBAR | 0x0910);	// !!! = 0x00006300
+  write32 (DEFAULT_DMIBAR | 0x0910, 0x00004300);
+  read32 (DEFAULT_DMIBAR | 0x0930);	// !!! = 0x00006300
+  write32 (DEFAULT_DMIBAR | 0x0930, 0x00004300);
+  read32 (DEFAULT_DMIBAR | 0x0a00);	// !!! = 0x03042010
+  write32 (DEFAULT_DMIBAR | 0x0a00, 0x03042018);
+  read32 (DEFAULT_DMIBAR | 0x0a10);	// !!! = 0x03042010
+  write32 (DEFAULT_DMIBAR | 0x0a10, 0x03042018);
+  read32 (DEFAULT_DMIBAR | 0x0a20);	// !!! = 0x03042010
+  write32 (DEFAULT_DMIBAR | 0x0a20, 0x03042018);
+  read32 (DEFAULT_DMIBAR | 0x0a30);	// !!! = 0x03042010
+  write32 (DEFAULT_DMIBAR | 0x0a30, 0x03042018);
+  read32 (DEFAULT_DMIBAR | 0x0c00);	// !!! = 0x29700c08
+  write32 (DEFAULT_DMIBAR | 0x0c00, 0x29700c08);
+  read32 (DEFAULT_DMIBAR | 0x0a04);	// !!! = 0x0c0708f0
+  write32 (DEFAULT_DMIBAR | 0x0a04, 0x0c0718f0);
+  read32 (DEFAULT_DMIBAR | 0x0a14);	// !!! = 0x0c0708f0
+  write32 (DEFAULT_DMIBAR | 0x0a14, 0x0c0718f0);
+  read32 (DEFAULT_DMIBAR | 0x0a24);	// !!! = 0x0c0708f0
+  write32 (DEFAULT_DMIBAR | 0x0a24, 0x0c0718f0);
+  read32 (DEFAULT_DMIBAR | 0x0a34);	// !!! = 0x0c0708f0
+  write32 (DEFAULT_DMIBAR | 0x0a34, 0x0c0718f0);
+  read32 (DEFAULT_DMIBAR | 0x0900);	// !!! = 0x50000000
+  write32 (DEFAULT_DMIBAR | 0x0900, 0x50000000);
+  read32 (DEFAULT_DMIBAR | 0x0920);	// !!! = 0x50000000
+  write32 (DEFAULT_DMIBAR | 0x0920, 0x50000000);
+  read32 (DEFAULT_DMIBAR | 0x0908);	// !!! = 0x51ffffff
+  write32 (DEFAULT_DMIBAR | 0x0908, 0x51ffffff);
+  read32 (DEFAULT_DMIBAR | 0x0928);	// !!! = 0x51ffffff
+  write32 (DEFAULT_DMIBAR | 0x0928, 0x51ffffff);
+  read32 (DEFAULT_DMIBAR | 0x0a00);	// !!! = 0x03042018
+  write32 (DEFAULT_DMIBAR | 0x0a00, 0x03042018);
+  read32 (DEFAULT_DMIBAR | 0x0a10);	// !!! = 0x03042018
+  write32 (DEFAULT_DMIBAR | 0x0a10, 0x03042018);
+  read32 (DEFAULT_DMIBAR | 0x0a20);	// !!! = 0x03042018
+  write32 (DEFAULT_DMIBAR | 0x0a20, 0x03042018);
+  read32 (DEFAULT_DMIBAR | 0x0a30);	// !!! = 0x03042018
+  write32 (DEFAULT_DMIBAR | 0x0a30, 0x03042018);
+  read32 (DEFAULT_DMIBAR | 0x0700);	// !!! = 0x46139008
+  write32 (DEFAULT_DMIBAR | 0x0700, 0x46139008);
+  read32 (DEFAULT_DMIBAR | 0x0720);	// !!! = 0x46139008
+  write32 (DEFAULT_DMIBAR | 0x0720, 0x46139008);
+  read32 (DEFAULT_DMIBAR | 0x0904);	// !!! = 0x3a1842ec
+  write32 (DEFAULT_DMIBAR | 0x0904, 0x3a1846ec);
+  read32 (DEFAULT_DMIBAR | 0x0924);	// !!! = 0x3a1842ec
+  write32 (DEFAULT_DMIBAR | 0x0924, 0x3a1846ec);
+  read32 (DEFAULT_DMIBAR | 0x0a00);	// !!! = 0x03042018
+  write32 (DEFAULT_DMIBAR | 0x0a00, 0x03042018);
+  read32 (DEFAULT_DMIBAR | 0x0a10);	// !!! = 0x03042018
+  write32 (DEFAULT_DMIBAR | 0x0a10, 0x03042018);
+  read32 (DEFAULT_DMIBAR | 0x0a20);	// !!! = 0x03042018
+  write32 (DEFAULT_DMIBAR | 0x0a20, 0x03042018);
+  read32 (DEFAULT_DMIBAR | 0x0a30);	// !!! = 0x03042018
+  write32 (DEFAULT_DMIBAR | 0x0a30, 0x03042018);
+  read32 (DEFAULT_DMIBAR | 0x0908);	// !!! = 0x51ffffff
+  write32 (DEFAULT_DMIBAR | 0x0908, 0x51ffffff);
+  read32 (DEFAULT_DMIBAR | 0x0928);	// !!! = 0x51ffffff
+  write32 (DEFAULT_DMIBAR | 0x0928, 0x51ffffff);
+  read32 (DEFAULT_DMIBAR | 0x0c00);	// !!! = 0x29700c08
+  write32 (DEFAULT_DMIBAR | 0x0c00, 0x29700c08);
+  read32 (DEFAULT_DMIBAR | 0x0c0c);	// !!! = 0x16063400
+  write32 (DEFAULT_DMIBAR | 0x0c0c, 0x00063400);
+  read32 (DEFAULT_DMIBAR | 0x0700);	// !!! = 0x46139008
+  write32 (DEFAULT_DMIBAR | 0x0700, 0x46339008);
+  read32 (DEFAULT_DMIBAR | 0x0720);	// !!! = 0x46139008
+  write32 (DEFAULT_DMIBAR | 0x0720, 0x46339008);
+  read32 (DEFAULT_DMIBAR | 0x0700);	// !!! = 0x46339008
+  write32 (DEFAULT_DMIBAR | 0x0700, 0x45339008);
+  read32 (DEFAULT_DMIBAR | 0x0720);	// !!! = 0x46339008
+  write32 (DEFAULT_DMIBAR | 0x0720, 0x45339008);
+  read32 (DEFAULT_DMIBAR | 0x0700);	// !!! = 0x45339008
+  write32 (DEFAULT_DMIBAR | 0x0700, 0x453b9008);
+  read32 (DEFAULT_DMIBAR | 0x0720);	// !!! = 0x45339008
+  write32 (DEFAULT_DMIBAR | 0x0720, 0x453b9008);
+  read32 (DEFAULT_DMIBAR | 0x0700);	// !!! = 0x453b9008
+  write32 (DEFAULT_DMIBAR | 0x0700, 0x45bb9008);
+  read32 (DEFAULT_DMIBAR | 0x0720);	// !!! = 0x453b9008
+  write32 (DEFAULT_DMIBAR | 0x0720, 0x45bb9008);
+  read32 (DEFAULT_DMIBAR | 0x0700);	// !!! = 0x45bb9008
+  write32 (DEFAULT_DMIBAR | 0x0700, 0x45fb9008);
+  read32 (DEFAULT_DMIBAR | 0x0720);	// !!! = 0x45bb9008
+  write32 (DEFAULT_DMIBAR | 0x0720, 0x45fb9008);
+  read32 (DEFAULT_DMIBAR | 0x0914);	// !!! = 0x9021a080
+  write32 (DEFAULT_DMIBAR | 0x0914, 0x9021a280);
+  read32 (DEFAULT_DMIBAR | 0x0934);	// !!! = 0x9021a080
+  write32 (DEFAULT_DMIBAR | 0x0934, 0x9021a280);
+  read32 (DEFAULT_DMIBAR | 0x0914);	// !!! = 0x9021a280
+  write32 (DEFAULT_DMIBAR | 0x0914, 0x9821a280);
+  read32 (DEFAULT_DMIBAR | 0x0934);	// !!! = 0x9021a280
+  write32 (DEFAULT_DMIBAR | 0x0934, 0x9821a280);
+  read32 (DEFAULT_DMIBAR | 0x0a00);	// !!! = 0x03042018
+  write32 (DEFAULT_DMIBAR | 0x0a00, 0x03242018);
+  read32 (DEFAULT_DMIBAR | 0x0a10);	// !!! = 0x03042018
+  write32 (DEFAULT_DMIBAR | 0x0a10, 0x03242018);
+  read32 (DEFAULT_DMIBAR | 0x0a20);	// !!! = 0x03042018
+  write32 (DEFAULT_DMIBAR | 0x0a20, 0x03242018);
+  read32 (DEFAULT_DMIBAR | 0x0a30);	// !!! = 0x03042018
+  write32 (DEFAULT_DMIBAR | 0x0a30, 0x03242018);
+  read32 (DEFAULT_DMIBAR | 0x0258);	// !!! = 0x40000600
+  write32 (DEFAULT_DMIBAR | 0x0258, 0x60000600);
+  read32 (DEFAULT_DMIBAR | 0x0904);	// !!! = 0x3a1846ec
+  write32 (DEFAULT_DMIBAR | 0x0904, 0x2a1846ec);
+  read32 (DEFAULT_DMIBAR | 0x0914);	// !!! = 0x9821a280
+  write32 (DEFAULT_DMIBAR | 0x0914, 0x98200280);
+  read32 (DEFAULT_DMIBAR | 0x0924);	// !!! = 0x3a1846ec
+  write32 (DEFAULT_DMIBAR | 0x0924, 0x2a1846ec);
+  read32 (DEFAULT_DMIBAR | 0x0934);	// !!! = 0x9821a280
+  write32 (DEFAULT_DMIBAR | 0x0934, 0x98200280);
+  read32 (DEFAULT_DMIBAR | 0x022c);	// !!! = 0x00c26460
+  write32 (DEFAULT_DMIBAR | 0x022c, 0x00c2403c);
+  read8 (DEFAULT_RCBABASE | 0x21a4);	// !!! = 0x42
+  pcie_read_config32 (NORTHBRIDGE, 0xe4);	// !!! = 0xe200619b
+  pcie_read_config32 (SOUTHBRIDGE, 0xf0);	// !!! = 0xfed1c001
+  read32 (DEFAULT_RCBABASE | 0x21a4);	// !!! = 0x00012c42
+  read32 (DEFAULT_RCBABASE | 0x2340);	// !!! = 0x0013001b
+  write32 (DEFAULT_RCBABASE | 0x2340, 0x003a001b);
+  read8 (DEFAULT_RCBABASE | 0x21b0);	// !!! = 0x01
+  write8 (DEFAULT_RCBABASE | 0x21b0, 0x02);
+  read32 (DEFAULT_DMIBAR | 0x0084);	// !!! = 0x0041ac41
+  write32 (DEFAULT_DMIBAR | 0x0084, 0x0041ac42);
+  read8 (DEFAULT_DMIBAR | 0x0088);	// !!! = 0x00
+  write8 (DEFAULT_DMIBAR | 0x0088, 0x20);
+  read16 (DEFAULT_DMIBAR | 0x008a);	// !!! = 0x0041
+  read8 (DEFAULT_DMIBAR | 0x0088);	// !!! = 0x00
+  write8 (DEFAULT_DMIBAR | 0x0088, 0x20);
+  read16 (DEFAULT_DMIBAR | 0x008a);	// !!! = 0x0042
+  read16 (DEFAULT_DMIBAR | 0x008a);	// !!! = 0x0042
+  pcie_read_config16 (NORTHBRIDGE, 0x50);	// !!! = 0x0208
+  pcie_write_config16 (NORTHBRIDGE, 0x50, 0x020a);
+  pcie_read_config16 (PCI_DEV (0, 0x1e, 0), 0x00);	// !!! = 0x8086
+  pcie_write_config32 (PCI_DEV (0, 0x1e, 0), 0x18, 0x00ff0200);
+  pcie_read_config8 (PCI_DEV (0, 0x1e, 0), 0x19);	// !!! = 0x02
+  pcie_write_config16 (PCI_DEV (2, 0x0, 0), 0x00, 0x0000);
+
+  pcie_write_config8 (PCI_DEV (0, 0x1e, 0), 0x1a, 0x02);
+  pcie_write_config32 (PCI_DEV (0, 0x1e, 0), 0x18, 0x00000000);
+
+  pcie_read_config16 (NORTHBRIDGE, 0x54);	// !!! = 0x0011
+  pcie_read_config16 (NORTHBRIDGE, 0x50);	// !!! = 0x020a
+  pcie_write_config16 (NORTHBRIDGE, 0x50, 0x0202);
+  pcie_read_config8 (NORTHBRIDGE, 0x50);	// !!! = 0x02
+  pcie_write_config8 (NORTHBRIDGE, 0x50, 0x0a);
+  pcie_read_config16 (NORTHBRIDGE, 0x50);	// !!! = 0x020a
+  pcie_write_config16 (NORTHBRIDGE, 0x50, 0x020a);
+  pcie_read_config8 (GFX_DEV, 0x62);	// !!! = 0x02
+  pcie_write_config8 (GFX_DEV, 0x62, 0x02);
+  pcie_read_config16 (NORTHBRIDGE, 0x50);	// !!! = 0x020a
+  pcie_write_config16 (NORTHBRIDGE, 0x50, 0x0208);
+  pcie_read_config32 (NORTHBRIDGE, 0x68);	// !!! = 0xfed18001
+  pcie_read_config32 (NORTHBRIDGE, 0x6c);	// !!! = 0x00000000
+  read32 (DEFAULT_DMIBAR | 0x0014);	// !!! = 0x8000007f
+  write32 (DEFAULT_DMIBAR | 0x0014, 0x80000019);
+  read32 (DEFAULT_DMIBAR | 0x0020);	// !!! = 0x01000000
+  write32 (DEFAULT_DMIBAR | 0x0020, 0x81000022);
+  read32 (DEFAULT_DMIBAR | 0x002c);	// !!! = 0x02000000
+  write32 (DEFAULT_DMIBAR | 0x002c, 0x82000044);
+  read32 (DEFAULT_DMIBAR | 0x0038);	// !!! = 0x07000080
+  write32 (DEFAULT_DMIBAR | 0x0038, 0x87000080);
+  read8 (DEFAULT_DMIBAR | 0x0004);	// !!! = 0x00
+  write8 (DEFAULT_DMIBAR | 0x0004, 0x01);
+  pcie_read_config32 (SOUTHBRIDGE, 0xf0);	// !!! = 0xfed1c001
+  read32 (DEFAULT_RCBABASE | 0x0050);	// !!! = 0x01200654
+  write32 (DEFAULT_RCBABASE | 0x0050, 0x01200654);
+  read32 (DEFAULT_RCBABASE | 0x0050);	// !!! = 0x01200654
+  write32 (DEFAULT_RCBABASE | 0x0050, 0x012a0654);
+  read32 (DEFAULT_RCBABASE | 0x0050);	// !!! = 0x012a0654
+  read8 (DEFAULT_RCBABASE | 0x1114);	// !!! = 0x00
+  write8 (DEFAULT_RCBABASE | 0x1114, 0x05);
+  read32 (DEFAULT_RCBABASE | 0x2014);	// !!! = 0x80000011
+  write32 (DEFAULT_RCBABASE | 0x2014, 0x80000019);
+  read32 (DEFAULT_RCBABASE | 0x2020);	// !!! = 0x00000000
+  write32 (DEFAULT_RCBABASE | 0x2020, 0x81000022);
+  read32 (DEFAULT_RCBABASE | 0x2020);	// !!! = 0x81000022
+  read32 (DEFAULT_RCBABASE | 0x2030);	// !!! = 0x00000000
+  write32 (DEFAULT_RCBABASE | 0x2030, 0x82000044);
+  read32 (DEFAULT_RCBABASE | 0x2030);	// !!! = 0x82000044
+  read32 (DEFAULT_RCBABASE | 0x2040);	// !!! = 0x00000000
+  write32 (DEFAULT_RCBABASE | 0x2040, 0x87000080);
+  read32 (DEFAULT_RCBABASE | 0x0050);	// !!! = 0x012a0654
+  write32 (DEFAULT_RCBABASE | 0x0050, 0x812a0654);
+  read32 (DEFAULT_RCBABASE | 0x0050);	// !!! = 0x812a0654
+  read16 (DEFAULT_RCBABASE | 0x201a);	// !!! = 0x0000
+  read16 (DEFAULT_RCBABASE | 0x2026);	// !!! = 0x0000
+  read16 (DEFAULT_RCBABASE | 0x2036);	// !!! = 0x0000
+  read16 (DEFAULT_RCBABASE | 0x2046);	// !!! = 0x0000
+  read16 (DEFAULT_DMIBAR | 0x001a);	// !!! = 0x0000
+  read16 (DEFAULT_DMIBAR | 0x0026);	// !!! = 0x0000
+  read16 (DEFAULT_DMIBAR | 0x0032);	// !!! = 0x0000
+  read16 (DEFAULT_DMIBAR | 0x003e);	// !!! = 0x0000
+}
diff --git a/src/northbridge/intel/sandybridge/raminit.c b/src/northbridge/intel/sandybridge/raminit.c
index 61e1545..7f2beda 100644
--- a/src/northbridge/intel/sandybridge/raminit.c
+++ b/src/northbridge/intel/sandybridge/raminit.c
@@ -1,7 +1,8 @@
 /*
  * This file is part of the coreboot project.
  *
- * Copyright (C) 2011 Google Inc.
+ * Copyright (C) 2014 Damien Zammit <damien at zamaudio.com>
+ * Copyright (C) 2014 Vladimir Serbinenko <phcoder at gmail.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -31,128 +32,151 @@
 #include "raminit.h"
 #include "pei_data.h"
 #include "sandybridge.h"
+#include <delay.h>
 
 /* Management Engine is in the southbridge */
 #include "southbridge/intel/bd82x6x/me.h"
+#include "southbridge/intel/bd82x6x/pch.h"
+#include "southbridge/intel/bd82x6x/smbus.h"
+#include <cpu/x86/msr.h>
+#include <cpu/cpu.h>
+#include "cpu/intel/model_2065x/model_2065x.h"
+
 #if CONFIG_CHROMEOS
 #include <vendorcode/google/chromeos/chromeos.h>
 #else
 #define recovery_mode_enabled(x) 0
 #endif
 
-/*
- * MRC scrambler seed offsets should be reserved in
- * mainboard cmos.layout and not covered by checksum.
- */
-#if CONFIG_USE_OPTION_TABLE
-#include "option_table.h"
-#define CMOS_OFFSET_MRC_SEED     (CMOS_VSTART_mrc_scrambler_seed >> 3)
-#define CMOS_OFFSET_MRC_SEED_S3  (CMOS_VSTART_mrc_scrambler_seed_s3 >> 3)
-#define CMOS_OFFSET_MRC_SEED_CHK (CMOS_VSTART_mrc_scrambler_seed_chk >> 3)
-#else
-#define CMOS_OFFSET_MRC_SEED     152
-#define CMOS_OFFSET_MRC_SEED_S3  156
-#define CMOS_OFFSET_MRC_SEED_CHK 160
-#endif
+/* FIXME: no ECC support.  */
+/* FIXME: no support for 3-channel chipsets.  */
+/* FIXME: no S3.  */
+/* FIXME: no timing caching.  */
 
-void save_mrc_data(struct pei_data *pei_data)
-{
-	u16 c1, c2, checksum;
+#define SANDYIVY_MAX_DIMM_SLOTS 4
 
-#if CONFIG_EARLY_CBMEM_INIT
-	struct mrc_data_container *mrcdata;
-	int output_len = ALIGN(pei_data->mrc_output_len, 16);
+#define BASEFREQ 133
+#define tDLLK 512
 
-	/* Save the MRC S3 restore data to cbmem */
-	mrcdata = cbmem_add
-		(CBMEM_ID_MRCDATA,
-		 output_len + sizeof(struct mrc_data_container));
+#define IS_SANDY_CPU(x) ((x & 0xffff0) == 0x206a0)
+#define IS_SANDY_CPU_C(x) ((x & 0xf) == 4)
+#define IS_SANDY_CPU_D0(x) ((x & 0xf) == 5)
+#define IS_SANDY_CPU_D1(x) ((x & 0xf) == 6)
+#define IS_SANDY_CPU_D2(x) ((x & 0xf) == 7)
 
-	printk(BIOS_DEBUG, "Relocate MRC DATA from %p to %p (%u bytes)\n",
-	       pei_data->mrc_output, mrcdata, output_len);
+#define IS_IVY_CPU(x) ((x & 0xffff0) == 0x306a0)
+#define IS_IVY_CPU_C(x) ((x & 0xf) == 4)
+#define IS_IVY_CPU_K(x) ((x & 0xf) == 5)
+#define IS_IVY_CPU_D(x) ((x & 0xf) == 6)
+#define IS_IVY_CPU_E(x) ((x & 0xf) >= 8)
 
-	mrcdata->mrc_signature = MRC_DATA_SIGNATURE;
-	mrcdata->mrc_data_size = output_len;
-	mrcdata->reserved = 0;
-	memcpy(mrcdata->mrc_data, pei_data->mrc_output,
-	       pei_data->mrc_output_len);
+#define NUM_CHANNELS 2
+#define NUM_SLOTRANKS 4
 
-	/* Zero the unused space in aligned buffer. */
-	if (output_len > pei_data->mrc_output_len)
-		memset(mrcdata->mrc_data+pei_data->mrc_output_len, 0,
-		       output_len - pei_data->mrc_output_len);
+typedef struct odtmap_st {
+	u16 rttwr;
+	u16 rttnom;
+} odtmap;
 
-	mrcdata->mrc_checksum = compute_ip_checksum(mrcdata->mrc_data,
-						    mrcdata->mrc_data_size);
-#endif
+typedef struct dimm_info_st
+{
+        dimm_attr dimm[SANDYIVY_MAX_DIMM_SLOTS];
+} dimm_info;
 
-	/* Save the MRC seed values to CMOS */
-	cmos_write32(CMOS_OFFSET_MRC_SEED, pei_data->scrambler_seed);
-	printk(BIOS_DEBUG, "Save scrambler seed    0x%08x to CMOS 0x%02x\n",
-	       pei_data->scrambler_seed, CMOS_OFFSET_MRC_SEED);
+struct ram_rank_timings
+{
+  /* Register 4024. One byte per slotrank.  */
+  u8 val_4024;
+  /* Register 4028. One nibble per slotrank.  */
+  u8 val_4028;
 
-	cmos_write32(CMOS_OFFSET_MRC_SEED_S3, pei_data->scrambler_seed_s3);
-	printk(BIOS_DEBUG, "Save s3 scrambler seed 0x%08x to CMOS 0x%02x\n",
-	       pei_data->scrambler_seed_s3, CMOS_OFFSET_MRC_SEED_S3);
+  int val_320c;
 
-	/* Save a simple checksum of the seed values */
-	c1 = compute_ip_checksum((u8*)&pei_data->scrambler_seed,
-				 sizeof(u32));
-	c2 = compute_ip_checksum((u8*)&pei_data->scrambler_seed_s3,
-				 sizeof(u32));
-	checksum = add_ip_checksums(sizeof(u32), c1, c2);
+  struct ram_lane_timings
+  {
+    /* lane register offset 0x10.  */
+    u16 timA; /* bits 0 - 5, bits 16 - 18 */
+    u8 rising;			/* bits 8 - 14 */
+    u8 falling;			/* bits 20 - 26.  */
 
-	cmos_write(checksum & 0xff, CMOS_OFFSET_MRC_SEED_CHK);
-	cmos_write((checksum >> 8) & 0xff, CMOS_OFFSET_MRC_SEED_CHK+1);
-}
+    /* lane register offset 0x20.  */
+    int timC;			/* bit 0 - 5, 19.  */
+    u16 timB;			/* bits 8 - 13, 15 - 17.  */
+  } lanes[8];
+};
 
-static void prepare_mrc_cache(struct pei_data *pei_data)
-{
-	struct mrc_data_container *mrc_cache;
-	u16 c1, c2, checksum, seed_checksum;
+typedef struct ramctr_timing_st {
+	int mobile;
 
-	// preset just in case there is an error
-	pei_data->mrc_input = NULL;
-	pei_data->mrc_input_len = 0;
+        enum spd_memory_type dram_type;
+        u16 cas_supported;
+        /* tLatencies are in units of ns, scaled by x256 */
+        u32 tCK;
+        u32 tAA;
+        u32 tWR;
+        u32 tRCD;
+        u32 tRRD;
+        u32 tRP;
+        u32 tRAS;
+        u32 tRC;
+        u32 tRFC;
+        u32 tWTR;
+        u32 tRTP;
+        u32 tFAW;
+        /* Latencies in terms of clock cycles
+         * They are saved separately as they are needed for DRAM MRS commands*/
+        u8 CAS; /* CAS read latency */
+        u8 CWL; /* CAS write latency */
+        /* Number of dimms currently connected */
+        u8 n_dimms;
 
-	/* Read scrambler seeds from CMOS */
-	pei_data->scrambler_seed = cmos_read32(CMOS_OFFSET_MRC_SEED);
-	printk(BIOS_DEBUG, "Read scrambler seed    0x%08x from CMOS 0x%02x\n",
-	       pei_data->scrambler_seed, CMOS_OFFSET_MRC_SEED);
+	u32 tREFI;
+	u32 tMOD;
+	u32 tXSOffset;
+	u32 tWLO;
+	u32 tCKE;
+	u32 tXPDLL;
+	u32 tXP;
+	u32 tAONPD;
 
-	pei_data->scrambler_seed_s3 = cmos_read32(CMOS_OFFSET_MRC_SEED_S3);
-	printk(BIOS_DEBUG, "Read S3 scrambler seed 0x%08x from CMOS 0x%02x\n",
-	       pei_data->scrambler_seed_s3, CMOS_OFFSET_MRC_SEED_S3);
+	u32 delay1;
+	u32 delay2;
 
-	/* Compute seed checksum and compare */
-	c1 = compute_ip_checksum((u8*)&pei_data->scrambler_seed,
-				 sizeof(u32));
-	c2 = compute_ip_checksum((u8*)&pei_data->scrambler_seed_s3,
-				 sizeof(u32));
-	checksum = add_ip_checksums(sizeof(u32), c1, c2);
+	u8 eccsupport;
+	u8 dualchannel;
+	u8 thermalrefresh;
 
-	seed_checksum = cmos_read(CMOS_OFFSET_MRC_SEED_CHK);
-	seed_checksum |= cmos_read(CMOS_OFFSET_MRC_SEED_CHK+1) << 8;
+	u8 rankmap[2][2]; //channels, dimms
 
-	if (checksum != seed_checksum) {
-		printk(BIOS_ERR, "%s: invalid seed checksum\n", __func__);
-		pei_data->scrambler_seed = 0;
-		pei_data->scrambler_seed_s3 = 0;
-		return;
-	}
+	int reg_c14_offset;
 
-	if ((mrc_cache = find_current_mrc_cache()) == NULL) {
-		/* error message printed in find_current_mrc_cache */
-		return;
-	}
+	int edge_offset[3];
+	int timC_offset[3];
 
-	pei_data->mrc_input = mrc_cache->mrc_data;
-	pei_data->mrc_input_len = mrc_cache->mrc_data_size;
+	int rank_mirror[NUM_CHANNELS][NUM_SLOTRANKS];
 
-	printk(BIOS_DEBUG, "%s: at %p, size %x checksum %04x\n",
-	       __func__, pei_data->mrc_input,
-	       pei_data->mrc_input_len, mrc_cache->mrc_checksum);
-}
+	struct ram_rank_timings timings[NUM_CHANNELS][NUM_SLOTRANKS];
+} ramctr_timing;
+
+#define SOUTHBRIDGE PCI_DEV(0, 0x1f, 0)
+#define NORTHBRIDGE PCI_DEV(0, 0x0, 0)
+#define GFX_DEV PCI_DEV(0, 0x2, 0)
+#define HECIDEV PCI_DEV(0, 0x16, 0)
+#define NUM_SLOTS 2
+#define NUM_LANES 8
+#define FOR_ALL_LANES for (lane = 0; lane < NUM_LANES; lane++)
+#define FOR_ALL_CHANNELS for (channel = 0; channel < NUM_CHANNELS; channel++)
+#define FOR_ALL_POPULATED_RANKS for (slotrank = 0; slotrank < NUM_SLOTRANKS; slotrank++) if ((ctrl->rankmap[channel][0] | ctrl->rankmap[channel][1]) & (1 << slotrank))
+#define FOR_ALL_POPULATED_CHANNELS for (channel = 0; channel < NUM_CHANNELS; channel++) if (ctrl->rankmap[channel][0] | ctrl->rankmap[channel][1])
+#define max(a,b) ((a) > (b) ? (a) : (b))
+#define min(a,b) ((a) < (b) ? (a) : (b))
+#define MAX_EDGE_TIMING 71
+#define MAX_TIMC 127
+#define MAX_TIMB 511
+#define MAX_TIMA 127
+
+static void
+program_timings (ramctr_timing *ctrl, int channel);
 
 static const char* ecc_decoder[] = {
 	"inactive",
@@ -161,6 +185,28 @@ static const char* ecc_decoder[] = {
 	"active"
 };
 
+static void
+wait_txt_clear (void)
+{
+  struct cpuid_result cp;
+
+  cp = cpuid_ext (0x1, 0x0);
+  /* Check if TXT is supported?  */
+  if (!(cp.ecx & 0x40))
+    return;
+  /* Some TXT public bit.  */
+  if (!(read32 (0xfed30010) & 1))
+    return;
+  /* Wait for TXT clear.  */
+  while (!(read8 (0xfed40000) & (1 << 7)));
+}
+
+static void
+sfence (void)
+{
+  asm volatile ("sfence");
+}
+
 /*
  * Dump in the log memory controller configuration as read from the memory
  * controller registers.
@@ -204,97 +250,3415 @@ static void report_memory_config(void)
 	}
 }
 
-static void post_system_agent_init(struct pei_data *pei_data)
+static void post_system_agent_init(void)
 {
 	/* If PCIe init is skipped, set the PEG clock gating */
-	if (!pei_data->pcie_init)
-		MCHBAR32(0x7010) = MCHBAR32(0x7010) | 0x01;
+	MCHBAR32(0x7010) = MCHBAR32(0x7010) | 0x01;
 }
 
-/**
- * Find PEI executable in coreboot filesystem and execute it.
- *
- * @param pei_data: configuration data for UEFI PEI reference code
- */
-void sdram_initialize(struct pei_data *pei_data)
+void read_spd(spd_raw_data *spd, u8 addr)
 {
-	struct sys_info sysinfo;
-	int (*entry) (struct pei_data *pei_data) __attribute__ ((regparm(1)));
+	int j;
+	for (j = 0; j < 256; j++)
+		(*spd)[j] = do_smbus_read_byte(SMBUS_IO_BASE, addr, j);
+}
 
-	report_platform_info();
+static void dram_find_spds_ddr3(spd_raw_data *spd, dimm_info * dimm,
+				ramctr_timing * ctrl)
+{
+	int i = 0;
+	int dimms = 0;
+	static const u8 ch[4] = { 0, 0, 1, 1 };
+	static const u8 dm[4] = { 0, 1, 0, 1 };
+	for (i = 0; i < 4; i++) {
+		ctrl->rankmap[ch[i]][dm[i]] = 0;
+		spd_decode_ddr3(&dimm->dimm[i], spd[i]);
+		if (dimm->dimm[i].dram_type != SPD_MEMORY_TYPE_SDRAM_DDR3) {
+			// set dimm invalid
+			dimm->dimm[i].ranks = 0;
+			dimm->dimm[i].size_mb = 0;
+			continue;
+		}
 
-	/* Wait for ME to be ready */
-	intel_early_me_init();
-	intel_early_me_uma_size();
+		dram_print_spd_ddr3(&dimm->dimm[i]);
+		dimms++;
+		ctrl->rank_mirror[ch[i]][dm[i] * 2] = 0;
+		ctrl->rank_mirror[ch[i]][dm[i] * 2 + 1] = spd[i][0x3f] & 1;
 
-	printk(BIOS_DEBUG, "Starting UEFI PEI System Agent\n");
+		/* FIXME: should merge data from all dimms. */
+		ctrl->thermalrefresh = spd[i][31];
+		ctrl->rankmap[ch[i]][dm[i]] = (1 << dimm->dimm[i].ranks) - 1;
+		printram("i=%d  rankmap[%d][%d] = %d\n", i,
+			 ch[i], dm[i],
+			 ctrl->rankmap[ch[i]][dm[i]]);
+	}
+	if (!dimms)
+		die("No DIMMs were found");
+}
+
+static void dram_find_common_params(const dimm_info * dimms, ramctr_timing * ctrl)
+{
+	size_t i, valid_dimms;
+	ctrl->cas_supported = 0xff;
+	valid_dimms = 0;
+	for (i = 0; i < 4; i++) {
+		const dimm_attr *dimm = &dimms->dimm[i];
+		if (dimm->dram_type == SPD_MEMORY_TYPE_UNDEFINED)
+			continue;
+		valid_dimms++;
+
+		if (valid_dimms == 1) {
+			/* First DIMM defines the type of DIMM */
+			ctrl->dram_type = dimm->dram_type;
+		} else {
+			/* Check if we have mismatched DIMMs */
+			if (ctrl->dram_type != dimm->dram_type)
+				die("Mismatched DIMM Types");
+		}
+		/* Find all possible CAS combinations */
+		ctrl->cas_supported &= dimm->cas_supported;
+
+		/* Find the smallest common latencies supported by all DIMMs */
+		ctrl->tCK = MAX(ctrl->tCK, dimm->tCK);
+		ctrl->tAA = MAX(ctrl->tAA, dimm->tAA);
+		ctrl->tWR = MAX(ctrl->tWR, dimm->tWR);
+		ctrl->tRCD = MAX(ctrl->tRCD, dimm->tRCD);
+		ctrl->tRRD = MAX(ctrl->tRRD, dimm->tRRD);
+		ctrl->tRP = MAX(ctrl->tRP, dimm->tRP);
+		ctrl->tRAS = MAX(ctrl->tRAS, dimm->tRAS);
+		ctrl->tRC = MAX(ctrl->tRC, dimm->tRC);
+		ctrl->tRFC = MAX(ctrl->tRFC, dimm->tRFC);
+		ctrl->tWTR = MAX(ctrl->tWTR, dimm->tWTR);
+		ctrl->tRTP = MAX(ctrl->tRTP, dimm->tRTP);
+		ctrl->tFAW = MAX(ctrl->tFAW, dimm->tFAW);
+	}
 
-	memset(&sysinfo, 0, sizeof(sysinfo));
+	ctrl->n_dimms = valid_dimms;
+	if (!ctrl->cas_supported)
+		die("Unsupported DIMM combination. "
+		    "DIMMS do not support common CAS latency");
+	if (!valid_dimms)
+		die("No valid DIMMs found");
 
-	sysinfo.boot_path = pei_data->boot_mode;
+	ctrl->dualchannel =
+	    (pcie_read_config32(PCI_DEV(0, 0, 0), 0xE4) & 0x4000) >> 14;
+	if (ctrl->dualchannel) {
+		printram("Dual channel supported\n");
+	} else {
+		printram("Dual channel not supported\n");
+	}
+}
 
-	/*
-	 * Do not pass MRC data in for recovery mode boot,
-	 * Always pass it in for S3 resume.
+static u8 get_CWL(u8 CAS)
+{
+	/* Get CWL based on CAS using the following rule:
+	 *       _________________________________________
+	 * CAS: | 4T | 5T | 6T | 7T | 8T | 9T | 10T | 11T |
+	 * CWL: | 5T | 5T | 5T | 6T | 6T | 7T |  7T |  8T |
 	 */
-	if (!recovery_mode_enabled() || pei_data->boot_mode == 2)
-		prepare_mrc_cache(pei_data);
-
-	/* If MRC data is not found we cannot continue S3 resume. */
-	if (pei_data->boot_mode == 2 && !pei_data->mrc_input) {
-		printk(BIOS_DEBUG, "Giving up in sdram_initialize: No MRC data\n");
-		outb(0x6, 0xcf9);
-		hlt();
-	}
-
-	/* Pass console handler in pei_data */
-	pei_data->tx_byte = do_putchar;
-
-	/* Locate and call UEFI System Agent binary. */
-	/* TODO make MRC blob (0xab?) defined in cbfs_core.h. */
-	entry = cbfs_get_file_content(
-		CBFS_DEFAULT_MEDIA, "mrc.bin", 0xab, NULL);
-	if (entry) {
-		int rv;
-		rv = entry (pei_data);
-		if (rv) {
-			switch (rv) {
-			case -1:
-				printk(BIOS_ERR, "PEI version mismatch.\n");
-				break;
-			case -2:
-				printk(BIOS_ERR, "Invalid memory frequency.\n");
-				break;
-			default:
-				printk(BIOS_ERR, "MRC returned %x.\n", rv);
+	static const u8 cas_cwl_map[] = { 5, 5, 5, 6, 6, 7, 7, 8 };
+	if (CAS > 11)
+		return 8;
+	return cas_cwl_map[CAS - 4];
+}
+
+static u32 get_REFI(u32 tCK)
+{
+	/* Get REFI based on MCU frequency using the following rule:
+	 *        _________________________________________
+	 * FRQ : | 3    | 4    | 5    | 6    | 7    | 8    |
+	 * REFI: | 3120 | 4160 | 5200 | 6240 | 7280 | 8320 |
+	 */
+	u32 FRQ = (u32) 256000 / (tCK * BASEFREQ);
+	static const u32 frq_refi_map[] =
+	    { 3120, 4160, 5200, 6240, 7280, 8320 };
+	if (FRQ > 8)
+		return 8320;
+	return frq_refi_map[FRQ - 3];
+}
+
+static u8 get_XSOffset(u32 tCK)
+{
+	/* Get XSOffset based on MCU frequency using the following rule:
+	 *             _________________________
+	 * FRQ      : | 3 | 4 | 5 | 6 | 7  | 8  |
+	 * XSOffset : | 4 | 6 | 7 | 8 | 10 | 11 |
+	 */
+	u32 FRQ = (u32) 256000 / (tCK * BASEFREQ);
+	static const u8 frq_xs_map[] = { 4, 6, 7, 8, 10, 11 };
+	if (FRQ > 8)
+		return 11;
+	return frq_xs_map[FRQ - 3];
+}
+
+static u8 get_MOD(u32 tCK)
+{
+	/* Get MOD based on MCU frequency using the following rule:
+	 *        _____________________________
+	 * FRQ : | 3  | 4  | 5  | 6  | 7  | 8  |
+	 * MOD : | 12 | 12 | 12 | 12 | 15 | 16 |
+	 */
+	u32 FRQ = (u32) 256000 / (tCK * BASEFREQ);
+	static const u8 frq_mod_map[] = { 12, 12, 12, 12, 15, 16 };
+	if (FRQ > 8)
+		return 16;
+	return frq_mod_map[FRQ - 3];
+}
+
+static u8 get_WLO(u32 tCK)
+{
+	/* Get WLO based on MCU frequency using the following rule:
+	 *        _______________________
+	 * FRQ : | 3 | 4 | 5 | 6 | 7 | 8 |
+	 * WLO : | 4 | 5 | 6 | 6 | 8 | 8 |
+	 */
+	u32 FRQ = (u32) 256000 / (tCK * BASEFREQ);
+	static const u8 frq_wlo_map[] = { 4, 5, 6, 6, 8, 8 };
+	if (FRQ > 8)
+		return 8;
+	return frq_wlo_map[FRQ - 3];
+}
+
+static u8 get_CKE(u32 tCK)
+{
+	/* Get CKE based on MCU frequency using the following rule:
+	 *        _______________________
+	 * FRQ : | 3 | 4 | 5 | 6 | 7 | 8 |
+	 * CKE : | 3 | 3 | 4 | 4 | 5 | 6 |
+	 */
+	u32 FRQ = (u32) 256000 / (tCK * BASEFREQ);
+	static const u8 frq_cke_map[] = { 3, 3, 4, 4, 5, 6 };
+	if (FRQ > 8)
+		return 6;
+	return frq_cke_map[FRQ - 3];
+}
+
+static u8 get_XPDLL(u32 tCK)
+{
+	/* Get XPDLL based on MCU frequency using the following rule:
+	 *          _____________________________
+	 * FRQ   : | 3  | 4  | 5  | 6  | 7  | 8  |
+	 * XPDLL : | 10 | 13 | 16 | 20 | 23 | 26 |
+	 */
+	u32 FRQ = (u32) 256000 / (tCK * BASEFREQ);
+	static const u8 frq_xpdll_map[] = { 10, 13, 16, 20, 23, 26 };
+	if (FRQ > 8)
+		return 26;
+	return frq_xpdll_map[FRQ - 3];
+}
+
+static u8 get_XP(u32 tCK)
+{
+	/* Get XP based on MCU frequency using the following rule:
+	 *        _______________________
+	 * FRQ : | 3 | 4 | 5 | 6 | 7 | 8 |
+	 * XP  : | 3 | 4 | 4 | 5 | 6 | 7 |
+	 */
+	u32 FRQ = (u32) 256000 / (tCK * BASEFREQ);
+	static const u8 frq_xp_map[] = { 3, 4, 4, 5, 6, 7 };
+	if (FRQ > 8)
+		return 7;
+	return frq_xp_map[FRQ - 3];
+}
+
+static u8 get_AONPD(u32 tCK)
+{
+	/* Get AONPD based on MCU frequency using the following rule:
+	 *          ________________________
+	 * FRQ   : | 3 | 4 | 5 | 6 | 7 | 8  |
+	 * AONPD : | 4 | 5 | 6 | 8 | 8 | 10 |
+	 */
+	u32 FRQ = (u32) 256000 / (tCK * BASEFREQ);
+	static const u8 frq_aonpd_map[] = { 4, 5, 6, 8, 8, 10 };
+	if (FRQ > 8)
+		return 10;
+	return frq_aonpd_map[FRQ - 3];
+}
+
+static u32 get_COMP2(u32 tCK)
+{
+	/* Get COMP2 based on MCU frequency using the following rule:
+	 *         ___________________________________________________________
+	 * FRQ  : | 3       | 4       | 5       | 6       | 7       | 8       |
+	 * COMP : | D6BEDCC | CE7C34C | CA57A4C | C6369CC | C42514C | C21410C |
+	 */
+	u32 FRQ = (u32) 256000 / (tCK * BASEFREQ);
+	static const u32 frq_comp2_map[] = { 0xD6BEDCC, 0xCE7C34C, 0xCA57A4C,
+		0xC6369CC, 0xC42514C, 0xC21410C
+	};
+	if (FRQ > 8)
+		return 0xD6BEDCC;
+	return frq_comp2_map[FRQ - 3];
+}
+
+static void dram_timing(ramctr_timing * ctrl)
+{
+	u8 val;
+	u32 val32;
+
+	/* Maximum supported DDR3 frequency is 1066MHz (DDR3 2133) so make sure
+	 * we cap it if we have faster DIMMs.
+	 * Then, align it to the closest JEDEC standard frequency */
+	if (ctrl->tCK <= TCK_1066MHZ) {
+		ctrl->tCK = TCK_1066MHZ;
+		ctrl->delay1 = 16;
+		ctrl->delay2 = 8;
+		ctrl->edge_offset[0] = 16;
+		ctrl->edge_offset[1] = 7;
+		ctrl->edge_offset[2] = 7;
+		ctrl->timC_offset[0] = 18;
+		ctrl->timC_offset[1] = 7;
+		ctrl->timC_offset[2] = 7;
+		ctrl->reg_c14_offset = 16;
+	} else if (ctrl->tCK <= TCK_933MHZ) {
+		ctrl->tCK = TCK_933MHZ;
+		ctrl->delay1 = 15;
+		ctrl->delay2 = 8;
+		ctrl->edge_offset[0] = 14;
+		ctrl->edge_offset[1] = 6;
+		ctrl->edge_offset[2] = 6;
+		ctrl->timC_offset[0] = 15;
+		ctrl->timC_offset[1] = 6;
+		ctrl->timC_offset[2] = 6;
+		ctrl->reg_c14_offset = 14;
+	} else if (ctrl->tCK <= TCK_800MHZ) {
+		ctrl->tCK = TCK_800MHZ;
+		ctrl->delay1 = 12;
+		ctrl->delay2 = 6;
+		ctrl->edge_offset[0] = 13;
+		ctrl->edge_offset[1] = 5;
+		ctrl->edge_offset[2] = 5;
+		ctrl->timC_offset[0] = 14;
+		ctrl->timC_offset[1] = 5;
+		ctrl->timC_offset[2] = 5;
+		ctrl->reg_c14_offset = 12;
+	} else if (ctrl->tCK <= TCK_666MHZ) {
+		ctrl->tCK = TCK_666MHZ;
+		ctrl->delay1 = 12;
+		ctrl->delay2 = 6;
+		ctrl->edge_offset[0] = 10;
+		ctrl->edge_offset[1] = 4;
+		ctrl->edge_offset[2] = 4;
+		ctrl->timC_offset[0] = 11;
+		ctrl->timC_offset[1] = 4;
+		ctrl->timC_offset[2] = 4;
+		ctrl->reg_c14_offset = 10;
+	} else {
+		ctrl->tCK = TCK_533MHZ;
+		ctrl->delay1 = 12;
+		ctrl->delay2 = 5;
+		ctrl->edge_offset[0] = 8;
+		ctrl->edge_offset[1] = 3;
+		ctrl->edge_offset[2] = 3;
+		ctrl->timC_offset[0] = 9;
+		ctrl->timC_offset[1] = 3;
+		ctrl->timC_offset[2] = 3;
+		ctrl->reg_c14_offset = 8;
+	}
+
+	val32 = (1000 << 8) / ctrl->tCK;
+	printram("Selected DRAM frequency: %u MHz\n", val32);
+
+	/* Find CAS and CWL latencies */
+	val = (ctrl->tAA + ctrl->tCK - 1) / ctrl->tCK;
+	printram("Minimum  CAS latency   : %uT\n", val);
+	/* Find lowest supported CAS latency that satisfies the minimum value */
+	while (!((ctrl->cas_supported >> (val - 4)) & 1)
+	       && (ctrl->cas_supported >> (val - 4))) {
+		val++;
+	}
+	/* Is CAS supported */
+	if (!(ctrl->cas_supported & (1 << (val - 4))))
+		printram("CAS not supported\n");
+	printram("Selected CAS latency   : %uT\n", val);
+	ctrl->CAS = val;
+	ctrl->CWL = get_CWL(ctrl->CAS);
+	printram("Selected CWL latency   : %uT\n", ctrl->CWL);
+
+	/* Find tRCD */
+	ctrl->tRCD = (ctrl->tRCD + ctrl->tCK - 1) / ctrl->tCK;
+	printram("Selected tRCD          : %uT\n", ctrl->tRCD);
+
+	ctrl->tRP = (ctrl->tRP + ctrl->tCK - 1) / ctrl->tCK;
+	printram("Selected tRP           : %uT\n", ctrl->tRP);
+ 
+ 	/* Find tRAS */
+	ctrl->tRAS = (ctrl->tRAS + ctrl->tCK - 1) / ctrl->tCK;
+	printram("Selected tRAS          : %uT\n", ctrl->tRAS);
+
+	/* Find tWR */
+	ctrl->tWR = (ctrl->tWR + ctrl->tCK - 1) / ctrl->tCK;
+	printram("Selected tWR           : %uT\n", ctrl->tWR);
+
+	/* Find tFAW */
+	ctrl->tFAW = (ctrl->tFAW + ctrl->tCK - 1) / ctrl->tCK;
+	printram("Selected tFAW          : %uT\n", ctrl->tFAW);
+ 
+ 	/* Find tRRD */
+	ctrl->tRRD = (ctrl->tRRD + ctrl->tCK - 1) / ctrl->tCK;
+	printram("Selected tRRD          : %uT\n", ctrl->tRRD);
+ 
+ 	/* Find tRTP */
+	ctrl->tRTP = (ctrl->tRTP + ctrl->tCK - 1) / ctrl->tCK;
+	printram("Selected tRTP          : %uT\n", ctrl->tRTP);
+ 
+ 	/* Find tWTR */
+	ctrl->tWTR = (ctrl->tWTR + ctrl->tCK - 1) / ctrl->tCK;
+	printram("Selected tWTR          : %uT\n", ctrl->tWTR);
+
+	/* Refresh-to-Active or Refresh-to-Refresh (tRFC) */
+	ctrl->tRFC = (ctrl->tRFC + ctrl->tCK - 1) / ctrl->tCK;
+	printram("Selected tRFC          : %uT\n", ctrl->tRFC);
+
+	ctrl->tRC = (ctrl->tRC + ctrl->tCK - 1) / ctrl->tCK;
+	printram("Required tRC           : %uT\n", ctrl->tRC);
+
+	ctrl->tREFI = get_REFI(ctrl->tCK);
+	ctrl->tMOD = get_MOD(ctrl->tCK);
+	ctrl->tXSOffset = get_XSOffset(ctrl->tCK);
+	ctrl->tWLO = get_WLO(ctrl->tCK);
+	ctrl->tCKE = get_CKE(ctrl->tCK);
+	ctrl->tXPDLL = get_XPDLL(ctrl->tCK);
+	ctrl->tXP = get_XP(ctrl->tCK);
+	ctrl->tAONPD = get_AONPD(ctrl->tCK);
+}
+
+static void dram_freq(ramctr_timing * ctrl)
+{
+	u8 val1, val2;
+	u32 reg1 = 0;
+
+	/* Step 1 - Set target PCU frequency */
+
+	if (ctrl->tCK <= TCK_1066MHZ) {
+		val1 = 0x08;
+		ctrl->tCK = TCK_1066MHZ;
+	} else if (ctrl->tCK <= TCK_933MHZ) {
+		val1 = 0x07;
+		ctrl->tCK = TCK_933MHZ;
+	} else if (ctrl->tCK <= TCK_800MHZ) {
+		val1 = 0x06;
+		ctrl->tCK = TCK_800MHZ;
+	} else if (ctrl->tCK <= TCK_666MHZ) {
+		val1 = 0x05;
+		ctrl->tCK = TCK_666MHZ;
+	} else {
+		val1 = 0x04;
+		ctrl->tCK = TCK_533MHZ;
+	}
+
+	/* Step 2 - Select frequency in the MCU */
+	reg1 = val1;
+	reg1 |= 0x80000000;	// set running bit
+	MCHBAR32(0x5e00) = reg1;
+	while (reg1 >> 0x1f) {
+		printram(" PLL busy...");
+		reg1 = MCHBAR32(0x5e00);
+	}
+	printram("done\n");
+
+	/* Step 3 - Verify lock frequency */
+	reg1 = MCHBAR32(0x5e04);
+	val2 = (u8) reg1;
+	if (val2 > val1) {
+		printram("Lock frequency is lower, recalculating\n");
+		switch (val2) {
+		case 8:
+			ctrl->tCK = TCK_1066MHZ;
+			break;
+		case 7:
+			ctrl->tCK = TCK_933MHZ;
+			break;
+		case 6:
+			ctrl->tCK = TCK_800MHZ;
+			break;
+		case 5:
+			ctrl->tCK = TCK_666MHZ;
+			break;
+		case 4:
+			ctrl->tCK = TCK_533MHZ;
+			break;
+		default:
+			printram("ERROR: PLL is off or unknown\n");
+			break;
+		}
+		dram_timing(ctrl);	// recalculate timings
+	}
+	printram("MCU frequency is set at : %d MHz\n", (1000 << 8) / ctrl->tCK);
+}
+
+static void dram_xover(ramctr_timing * ctrl)
+{
+	size_t ch;
+	u32 reg, addr;
+	u8 rmap;
+	for (ch = 0; ch < 2; ch++) {
+		// enable xover clk
+		reg = 0;
+		rmap = (ctrl->rankmap[ch][0] | (ctrl->rankmap[ch][1] << 2));
+		reg = (reg & ~0xf000000) | (rmap << 24);
+		addr = (ch == 0) ? 0xc14 : 0xd14;
+		printram ("[%x] = %x\n", addr, reg);
+		MCHBAR32(addr) = reg;
+
+		// enable xover ctl
+		reg = 0;
+		if ((ctrl->rankmap[ch][0] & 0x1) || (ctrl->rankmap[ch][1] & 0x1)) {
+			reg = (reg & ~0x20000) | (1 << 0x11);
+		}
+		if ((ctrl->rankmap[ch][0] & 0x2) || (ctrl->rankmap[ch][1] & 0x2)) {
+			reg = (reg & ~0x4000000) | (1 << 0x1a);
+		}
+		// enable xover cmd
+		reg = (reg & ~0x4000) | (1 << 14);
+		addr = (ch == 0) ? 0x320c : 0x330c;
+		printram ("[%x] = %x\n", addr, reg);
+		MCHBAR32(addr) = reg;
+	}
+}
+
+static void dram_timing_regs(ramctr_timing * ctrl)
+{
+	size_t ch;
+	u32 reg, addr, val32, cpu, stretch;
+	u8 val;
+	struct cpuid_result cpures;
+
+	for (ch = 0; ch < 2; ch++) {
+		// DBP
+		reg = 0;
+		val = ctrl->tRCD;
+		reg = (reg & ~0xf) | val;
+		val = ctrl->tRP;
+		reg = (reg & ~0xf0) | (val << 0x4);
+		val = ctrl->CAS;
+		reg = (reg & ~0xf00) | (val << 0x8);
+		val = ctrl->CWL;
+		reg = (reg & ~0xf000) | (val << 0xc);
+		val = ctrl->tRAS;
+		reg = (reg & ~0xff0000) | (val << 0x10);
+		addr = (ch == 0 ? 0x4000 : 0x4400);
+		printram ("[%x] = %x\n", addr, reg);
+		MCHBAR32(addr) = reg;
+
+		// RAP
+		reg = 0;
+		val = ctrl->tRRD;
+		reg = (reg & ~0xf) | val;
+		val = ctrl->tRTP;
+		reg = (reg & ~0xf0) | (val << 0x4);
+		val = ctrl->tCKE;
+		reg = (reg & ~0xf00) | (val << 0x8);
+		val = ctrl->tWTR;
+		reg = (reg & ~0xf000) | (val << 0xc);
+		val = ctrl->tFAW;
+		reg = (reg & ~0xff0000) | (val << 0x10);
+		val = ctrl->tWR;
+		reg = (reg & ~0x1f000000) | (val << 0x18);
+		reg = (reg & ~0xc0000000) | (3 << 30);
+		addr = (ch == 0 ? 0x4004 : 0x4404);
+		printram ("[%x] = %x\n", addr, reg);
+		MCHBAR32(addr) = reg;
+
+		// OTHP
+		addr = (ch == 0 ? 0x400c : 0x440c);
+		reg = MCHBAR32(addr);
+		val = ctrl->tXPDLL;
+		reg = (reg & ~0x1f) | val;
+		val = ctrl->tXP;
+		reg = (reg & ~0xe0) | (val << 0x5);
+		val = ctrl->tAONPD;
+		reg = (reg & ~0xf00) | (val << 0x8);
+		printram ("[%x] = %x\n", addr, reg);
+		MCHBAR32(addr) = reg;
+
+		MCHBAR32(ch == 0 ? 0x4014 : 0x4414) = 0;
+
+		MCHBAR32(addr) |= 0x00020000;
+
+
+		// ODT stretch
+		reg = 0;
+
+		cpures = cpuid(0);
+		cpu = cpures.eax;
+		if (IS_IVY_CPU(cpu)
+		    || (IS_SANDY_CPU(cpu) && IS_SANDY_CPU_D2(cpu))) {
+			stretch = 2;
+			addr = (ch == 0 ? 0x400c : 0x440c);
+			printram ("[%x] = %x\n", addr, reg);
+			reg = MCHBAR32(addr);
+
+			if ((ctrl->rankmap[ch][0] == 0) ||
+			    ctrl->rankmap[ch][1] == 0) {
+
+				// Rank 0 - operate on rank 2
+				reg = (reg & ~0xc0000) | (stretch << 0x12);
+
+				// Rank 2 - operate on rank 0
+				reg = (reg & ~0x30000) | (stretch << 0x10);
+
+				addr = (ch == 0 ? 0x400c : 0x440c);
+				printram ("[%x] = %x\n", addr, reg);
+				MCHBAR32(addr) = reg;
+			}
+
+		} else if (IS_SANDY_CPU(cpu) && IS_SANDY_CPU_C(cpu)) {
+			stretch = 3;
+			addr = (ch == 0 ? 0x401c : 0x441c);
+			reg = MCHBAR32(addr);
+
+			if ((ctrl->rankmap[ch][0] == 0) ||
+			    ctrl->rankmap[ch][1] == 0) {
+
+				// Rank 0 - operate on rank 2
+				reg = (reg & ~0x3000) | (stretch << 0xc);
+
+				// Rank 2 - operate on rank 0
+				reg = (reg & ~0xc00) | (stretch << 0xa);
+
+				addr = (ch == 0 ? 0x401c : 0x441c);
+				printram ("[%x] = %x\n", addr, reg);
+				MCHBAR32(addr) = reg;
 			}
-			die("Nonzero MRC return value.\n");
+		} else {
+			stretch = 0;
+		}
+
+		// REFI
+		reg = 0;
+		val32 = ctrl->tREFI;
+		reg = (reg & ~0xffff) | val32;
+		val32 = ctrl->tRFC;
+		reg = (reg & ~0x1ff0000) | (val32 << 0x10);
+		val32 = (u32) (ctrl->tREFI * 9) / 1024;
+		reg = (reg & ~0xfe000000) | (val32 << 0x19);
+		addr = (ch == 0 ? 0x4298 : 0x4698);
+		printram ("[%x] = %x\n", addr, reg);
+		MCHBAR32(addr) = reg;
+
+		addr = (ch == 0 ? 0x4294 : 0x4694);
+		MCHBAR32(addr) |= 0xff;
+
+		// SRFTP
+		reg = 0;
+		val32 = tDLLK;
+		reg = (reg & ~0xfff) | val32;
+		val32 = ctrl->tXSOffset;
+		reg = (reg & ~0xf000) | (val32 << 0xc);
+		val32 = tDLLK - ctrl->tXSOffset;
+		reg = (reg & ~0x3ff0000) | (val32 << 0x10);
+		val32 = ctrl->tMOD - 8;
+		reg = (reg & ~0xf0000000) | (val32 << 0x1c);
+		addr = (ch == 0 ? 0x42a4 : 0x46a4);
+		printram ("[%x] = %x\n", addr, reg);
+		MCHBAR32(addr) = reg;
+	}
+}
+
+static void dram_dimm_mapping(dimm_info * info, ramctr_timing * ctrl)
+{
+	size_t ch;
+	int t;
+	u32 reg, addr, val32;
+	for (ch = 0; ch < 2; ch++) {
+		dimm_attr *dimmA = 0;
+		dimm_attr *dimmB = 0;
+		reg = 0;
+		val32 = 0;
+		addr = 0;
+		if (info->dimm[2 * ch].size_mb >=
+		    info->dimm[2 * ch + 1].size_mb) {
+			// dimm 0 is bigger, set it to dimmA
+			dimmA = &info->dimm[2 * ch];
+			dimmB = &info->dimm[2 * ch + 1];
+			reg = (reg & ~0x10000) | (0 << 0x10);
+		} else {
+			// dimm 1 is bigger, set it to dimmA
+			dimmA = &info->dimm[2 * ch + 1];
+			dimmB = &info->dimm[2 * ch];
+			reg = (reg & ~0x10000) | (1 << 0x10);
+			// swap dimm info
+			t = ctrl->rank_mirror[ch][1];
+			ctrl->rank_mirror[ch][1] = ctrl->rank_mirror[ch][3];
+			ctrl->rank_mirror[ch][3] = t;
+		}
+		// dimmA
+		if (dimmA && (dimmA->ranks > 0)) {
+			val32 = dimmA->size_mb / 256;
+			reg = (reg & ~0xff) | val32;
+			val32 = dimmA->ranks - 1;
+			reg = (reg & ~0x20000) | (val32 << 0x11);
+			val32 = (dimmA->width / 8) - 1;
+			reg = (reg & ~0x80000) | (val32 << 0x13);
+		}
+		// dimmB
+		if (dimmB && (dimmB->ranks > 0)) {
+			val32 = dimmB->size_mb / 256;
+			reg = (reg & ~0xff00) | (val32 << 0x8);
+			val32 = dimmB->ranks - 1;
+			reg = (reg & ~0x40000) | (val32 << 0x12);
+			val32 = (dimmB->width / 8) - 1;
+			reg = (reg & ~0x100000) | (val32 << 0x14);
 		}
+		reg = (reg & ~0x200000) | (1 << 0x15);	// rank interleave
+		reg = (reg & ~0x400000) | (1 << 0x16);	// enhanced interleave
+
+		// Set MAD-DIMM register
+		addr = 0x5004 + ch * 4;
+		if ((dimmA && (dimmA->ranks > 0)) ||
+		    (dimmB && (dimmB->ranks > 0))) {
+			MCHBAR32(addr) = reg;
+		}
+	}
+}
+
+static void dram_zones(dimm_info * info, ramctr_timing * ctrl, int training)
+{
+	u32 reg, ch0size, ch1size;
+	u8 val;
+	reg = 0;
+	val = 0;
+	if (training) {
+		ch0size = info->dimm[0].size_mb + info->dimm[1].size_mb ? 256 : 0;
+		ch1size = info->dimm[2].size_mb + info->dimm[3].size_mb ? 256 : 0;
 	} else {
-		die("UEFI PEI System Agent not found.\n");
+		ch0size = info->dimm[0].size_mb + info->dimm[1].size_mb;
+		ch1size = info->dimm[2].size_mb + info->dimm[3].size_mb;
 	}
 
-#if CONFIG_USBDEBUG_IN_ROMSTAGE
-	/* mrc.bin reconfigures USB, so reinit it to have debug */
-	usbdebug_init();
-#endif
+	if (ch0size >= ch1size) {
+		reg = MCHBAR32(0x5000);
+		reg = (reg & ~0xf) | 4;
+		MCHBAR32(0x5000) = reg;
 
-	/* For reference print the System Agent version
-	 * after executing the UEFI PEI stage.
-	 */
-	u32 version = MCHBAR32(0x5034);
-	printk(BIOS_DEBUG, "System Agent Version %d.%d.%d Build %d\n",
-		version >> 24 , (version >> 16) & 0xff,
-		(version >> 8) & 0xff, version & 0xff);
-
-	/* Send ME init done for SandyBridge here.  This is done
-	 * inside the SystemAgent binary on IvyBridge. */
-	if (BASE_REV_SNB ==
-	    (pci_read_config16(PCI_CPU_DEVICE, PCI_DEVICE_ID) & BASE_REV_MASK))
-		intel_early_me_init_done(ME_INIT_STATUS_SUCCESS);
-	else
-		intel_early_me_status();
+		reg = MCHBAR32(0x5014);
+		val = ch1size / 256;
+		reg = (reg & ~0xff000000) | val << 24;
+		reg = (reg & ~0xff0000) | (2 * val) << 16;
+		MCHBAR32(0x5014) = reg;
+	} else {
+		reg = MCHBAR32(0x5000);
+		reg = (reg & ~0xf) | 1;
+		MCHBAR32(0x5000) = reg;
+
+		reg = MCHBAR32(0x5014);
+		val = ch0size / 256;
+		reg = (reg & ~0xff000000) | val << 24;
+		reg = (reg & ~0xff0000) | (2 * val) << 16;
+		MCHBAR32(0x5014) = reg;
+	}
+
+	reg = MCHBAR32(0x5000);
+	reg = (reg & ~0x18) | (1 << 4);
+	MCHBAR32(0x5000) = reg;
+}
+
+/* FIXME: this function bugs.  */
+static void dram_memorymap(dimm_info * info, int me_uma_size)
+{
+	u32 reg, val, reclaim;
+	u32 tom, gfxstolen, gttsize;
+	size_t tsegsize, mmiosize, toludbase, touudbase, gfxstolenbase, gttbase,
+	    tsegbase, mestolenbase;
+	size_t tsegbasedelta, remapbase, remaplimit;
+
+	// FIXME: Make these configurable
+	gfxstolen = 128;
+	gttsize = 1;
+	mmiosize = 0x400;
+	//
+
+	tsegsize = CONFIG_SMM_TSEG_SIZE >> 20;
+
+	tom = info->dimm[0].size_mb + info->dimm[1].size_mb
+	    + info->dimm[2].size_mb + info->dimm[3].size_mb;
+
+	mestolenbase = tom - me_uma_size;
+
+	toludbase = MIN(4096 - mmiosize, tom - me_uma_size);
+	gfxstolenbase = toludbase - gfxstolen;
+	gttbase = gfxstolenbase - gttsize;
+
+	tsegbase = gttbase - tsegsize;
+
+	// Round tsegbase down to nearest address aligned to tsegsize
+	tsegbasedelta = tsegbase & (tsegsize - 1);
+	tsegbase &= ~(tsegsize - 1);
+
+	gttbase -= tsegbasedelta;
+	gfxstolenbase -= tsegbasedelta;
+	toludbase -= tsegbasedelta;
+
+	// Test if it is possible to reclaim a hole in the ram addressing
+	if (tom - me_uma_size > toludbase) {
+		// Reclaim is possible
+		reclaim = 1;
+		remapbase = MAX(4096, tom - me_uma_size);
+		remaplimit = remapbase +
+		    MIN(4096, tom - me_uma_size) - toludbase - 1;
+		touudbase = remaplimit + 1;
+	} else {
+		// Reclaim not possible
+		reclaim = 0;
+		touudbase = tom - me_uma_size;
+	}
+
+	// Update memory map in pci-e configuration space
+
+	// TOM (top of memory)
+	reg = pcie_read_config32(PCI_DEV(0, 0, 0), 0xa0);
+	val = tom & 0xfff;
+	reg = (reg & ~0xfff00000) | (val << 20);
+	printram ("PCI:[%x] = %x\n", 0xa0, reg);
+	pcie_write_config32(PCI_DEV(0, 0, 0), 0xa0, reg);
+
+	reg = pcie_read_config32(PCI_DEV(0, 0, 0), 0xa4);
+	val = tom & 0xfffff000;
+	reg = (reg & ~0x000fffff) | (val >> 12);
+	printram ("PCI:[%x] = %x\n", 0xa4, reg);
+	pcie_write_config32(PCI_DEV(0, 0, 0), 0xa4, reg);
+
+	// TOLUD (top of low used dram)
+	reg = pcie_read_config32(PCI_DEV(0, 0, 0), 0xbc);
+	val = toludbase & 0xfff;
+	reg = (reg & ~0xfff00000) | (val << 20);
+	printram ("PCI:[%x] = %x\n", 0xbc, reg);
+	pcie_write_config32(PCI_DEV(0, 0, 0), 0xbc, reg);
+
+	// TOUUD MSB (top of upper usable dram)
+	reg = pcie_read_config32(PCI_DEV(0, 0, 0), 0xa8);
+	val = touudbase & 0xfff;
+	reg = (reg & ~0xfff00000) | (val << 20);
+	printram ("PCI:[%x] = %x\n", 0xa8, reg);
+	pcie_write_config32(PCI_DEV(0, 0, 0), 0xa8, reg);
+
+	// TOUUD LSB
+	reg = pcie_read_config32(PCI_DEV(0, 0, 0), 0xac);
+	val = touudbase & 0xfffff000;
+	reg = (reg & ~0x000fffff) | (val >> 12);
+	printram ("PCI:[%x] = %x\n", 0xac, reg);
+	pcie_write_config32(PCI_DEV(0, 0, 0), 0xac, reg);
+
+	if (reclaim) {
+		// REMAP BASE
+		reg = pcie_read_config32(PCI_DEV(0, 0, 0), 0x94);
+		val = remapbase & 0xfffff000;
+		reg = (reg & ~0x000fffff) | (val >> 12);
+		printram ("PCI:[%x] = %x\n", 0x94, reg);
+		pcie_write_config32(PCI_DEV(0, 0, 0), 0x94, reg);
+
+		// REMAP LIMIT
+		reg = pcie_read_config32(PCI_DEV(0, 0, 0), 0x98);
+		val = remaplimit & 0xfff;
+		reg = (reg & ~0xfff00000) | (val << 20);
+		printram ("PCI:[%x] = %x\n", 0x98, reg);
+		pcie_write_config32(PCI_DEV(0, 0, 0), 0x98, reg);
+
+		reg = pcie_read_config32(PCI_DEV(0, 0, 0), 0x9c);
+		val = remaplimit & 0xfffff000;
+		reg = (reg & ~0x000fffff) | (val >> 12);
+		printram ("PCI:[%x] = %x\n", 0x9c, reg);
+		pcie_write_config32(PCI_DEV(0, 0, 0), 0x9c, reg);
+	}
+	// TSEG
+	reg = pcie_read_config32(PCI_DEV(0, 0, 0), 0xb8);
+	val = tsegbase & 0xfff;
+	reg = (reg & ~0xfff00000) | (val << 20);
+	printram ("PCI:[%x] = %x\n", 0xb8, reg);
+	pcie_write_config32(PCI_DEV(0, 0, 0), 0xb8, reg);
+
+	// GFX stolen memory
+	reg = pcie_read_config32(PCI_DEV(0, 0, 0), 0xb0);
+	val = gfxstolenbase & 0xfff;
+	reg = (reg & ~0xfff00000) | (val << 20);
+	printram ("PCI:[%x] = %x\n", 0xb0, reg);
+	pcie_write_config32(PCI_DEV(0, 0, 0), 0xb0, reg);
+
+	// GTT stolen memory
+	reg = pcie_read_config32(PCI_DEV(0, 0, 0), 0xb4);
+	val = gttbase & 0xfff;
+	reg = (reg & ~0xfff00000) | (val << 20);
+	printram ("PCI:[%x] = %x\n", 0xb4, reg);
+	pcie_write_config32(PCI_DEV(0, 0, 0), 0xb4, reg);
+
+	if (me_uma_size) {
+		reg = pcie_read_config32(PCI_DEV(0, 0, 0), 0x7c);
+		val = (0x80000 - me_uma_size) & 0xfffff000;
+		reg = (reg & ~0x000fffff) | (val >> 12);
+		printram ("PCI:[%x] = %x\n", 0x7c, reg);
+		pcie_write_config32(PCI_DEV(0, 0, 0), 0x7c, reg);
+
+		// ME base
+		reg = pcie_read_config32(PCI_DEV(0, 0, 0), 0x70);
+		val = mestolenbase & 0xfff;
+		reg = (reg & ~0xfff00000) | (val << 20);
+		printram ("PCI:[%x] = %x\n", 0x70, reg);
+		pcie_write_config32(PCI_DEV(0, 0, 0), 0x70, reg);
+
+		reg = pcie_read_config32(PCI_DEV(0, 0, 0), 0x74);
+		val = mestolenbase & 0xfffff000;
+		reg = (reg & ~0x000fffff) | (val >> 12);
+		printram ("PCI:[%x] = %x\n", 0x74, reg);
+		pcie_write_config32(PCI_DEV(0, 0, 0), 0x74, reg);
+
+		// ME mask
+		reg = pcie_read_config32(PCI_DEV(0, 0, 0), 0x78);
+		val = (0x80000 - me_uma_size) & 0xfff;
+		reg = (reg & ~0xfff00000) | (val << 20);
+		reg = (reg & ~0x400) | (1 << 10);	// set lockbit on ME mem
+
+		reg = (reg & ~0x800) | (1 << 11); // set ME memory enable
+		printram ("PCI:[%x] = %x\n", 0x78, reg);
+		pcie_write_config32(PCI_DEV(0, 0, 0), 0x78, reg);
+	}
+}
+
+static void dram_ioregs(ramctr_timing * ctrl)
+{
+	u32 reg, ch0rank, ch1rank, comp2;
+
+	int channel;
+
+	ch0rank = ctrl->rankmap[0][0] | (ctrl->rankmap[0][1] << 2);
+	ch1rank = ctrl->rankmap[1][0] | (ctrl->rankmap[1][1] << 2);
+
+	// IO clock
+	MCHBAR32(0xc00) = ch0rank;
+	MCHBAR32(0xd00) = ch1rank;
+
+	// IO command
+	MCHBAR32(0x3200) = ch0rank;
+	MCHBAR32(0x3300) = ch1rank;
+
+	// IO control
+	FOR_ALL_POPULATED_CHANNELS {
+		program_timings (ctrl, channel);
+	}
+
+	// Rcomp
+	printram("RCOMP...");
+	reg = 0;
+	while (reg == 0) {
+		reg = MCHBAR32(0x5084) & 0x10000;
+	}
+	printram("done\n");
+
+	// Set comp2
+	comp2 = get_COMP2(ctrl->tCK);
+	MCHBAR32(0x3714) = comp2;
+	printram("COMP2 done\n");
+
+	// Set comp1
+	FOR_ALL_POPULATED_CHANNELS {
+		reg = MCHBAR32(0x1810 + channel * 0x100);	//ch0
+		reg = (reg & ~0xe00) | (1 << 0x9);	//odt
+		reg = (reg & ~0xe00000) | (1 << 0x15);	//clk drive up
+		reg = (reg & ~0x38000000) | (1 << 0x1b);	//ctl drive up
+		MCHBAR32(0x1810 + channel * 0x100) = reg;
+	}
+	printram("COMP1 done\n");
+
+	printram("FORCE RCOMP and wait 20us...");
+	reg = MCHBAR32(0x5f08);
+	reg = (reg & ~0x100) | (1 << 0x8);
+	MCHBAR32(0x5f08) = reg;
+	udelay(20);
+	printram("done\n");
+}
+
+static void dram_jedecreset(ramctr_timing * ctrl)
+{
+	u32 reg, addr, rmap, rank;
+	u8 ch, chw;
+
+	while ( !(MCHBAR32(0x5084) & 0x10000) );
+	do {
+		reg = MCHBAR32(0x428c);
+	} while ((reg & 0x14) == 0);
+
+	// Set state of memory controller
+	reg = 0x112;
+	addr = 0x5030;
+	MCHBAR32(addr) = reg;
+	MCHBAR32(0x4ea0) = 0;
+	reg = (reg & ~0x2) | (1 << 0x1);	//ddr reset
+	MCHBAR32(addr) = reg;
+
+	// Assert dimm reset signal
+	reg = MCHBAR32(addr);
+	reg &= ~0x2;
+	MCHBAR32(addr) = reg;
+
+	// Wait 200us
+	udelay(200);
+
+	// Deassert dimm reset signal
+	reg = MCHBAR32(addr);
+	reg = (reg & ~0x2) | (1 << 0x1);
+	MCHBAR32(addr) = reg;
+
+	// Wait 500us
+	udelay(500);
+
+	// Enable DCLK
+	reg = MCHBAR32(addr);
+	reg = (reg & ~0x4) | (1 << 0x2);
+	MCHBAR32(addr) = reg;
+
+	// XXX Wait 20ns
+	udelay(1);
+
+	for (ch = 0; ch < 2; ch++) {
+		// Set valid rank CKE
+		reg = 0;
+		rmap = ctrl->rankmap[ch][0] | (ctrl->rankmap[ch][1] << 2);
+		reg = (reg & ~0xf) | rmap;
+		addr = (ch == 0) ? 0x42a0 : 0x46a0;
+		MCHBAR32(addr) = reg;
+
+		// Wait 10ns for ranks to settle
+		//udelay(0.01);
+
+		reg = (reg & ~0xf0) | (rmap << 0x4);
+		MCHBAR32(addr) = reg;
+
+		// Write reset using a NOP
+		reg = 0;
+		chw = (ctrl->rankmap[0][0] | ctrl->rankmap[0][1]) ? 0 : 1;
+		rank = (ctrl->rankmap[chw][0]) ? 0 : 2;
+		addr = (chw == 0) ? 0x428c : 0x468c;
+		do {
+			reg = MCHBAR32(addr);
+		} while ((reg & 0x14) == 0);
+
+		reg = 0;
+		reg = (reg & ~0xff) | 1;
+		reg = (reg & ~0x400000) | (1 << 0x16);
+		addr = (chw == 0) ? 0x4284 : 0x4684;
+		MCHBAR32(addr) = reg;
+	}
+}
+
+static odtmap get_ODT(ramctr_timing * ctrl, u8 rank)
+{
+	/* Get ODT based on rankmap: */
+	u8 dimms_per_ch, table;
+	u8 ch0dimmA, ch0dimmB, ch1dimmA, ch1dimmB;
+	static const odtmap odt_map[4][6] = {
+		{{60, 60}, {60, 60}, {120, 30}, {120, 30}, {120, 30},
+		 {120, 30}},
+		{{0, 0}, {60, 60}, {0, 0}, {0, 0}, {120, 30}, {120, 30}},
+		{{60, 60}, {60, 60}, {120, 30}, {120, 30}, {120, 30},
+		 {120, 30}},
+		{{0, 0}, {60, 60}, {0, 0}, {120, 30}, {0, 0}, {120, 30}}
+	};
+	ch0dimmA = (ctrl->rankmap[0][0] != 0) ? 1 : 0;
+	ch0dimmB = (ctrl->rankmap[0][1] != 0) ? 1 : 0;
+	ch1dimmA = (ctrl->rankmap[1][0] != 0) ? 1 : 0;
+	ch1dimmB = (ctrl->rankmap[1][1] != 0) ? 1 : 0;
+
+	dimms_per_ch = (ch0dimmA + ch0dimmB > ch1dimmA + ch1dimmB) ?
+	    ch0dimmA + ch0dimmB : ch1dimmA + ch1dimmB;
+	table = 0;
+
+	if (dimms_per_ch == 1) {
+		if ((ctrl->rankmap[0][0]
+		     | ctrl->rankmap[0][1]
+		     | ctrl->rankmap[1][0]
+		     | ctrl->rankmap[1][1]) != 3) {
+			table = 0;
+		} else {
+			table = 1;
+		}
+	} else if (dimms_per_ch == 2) {
+		if ((ctrl->rankmap[0][0] | ctrl->rankmap[0][1]) != 3) {
+			if ((ctrl->rankmap[1][0] | ctrl->rankmap[1][1]) != 3) {
+				table = 2;
+			} else {
+				table = 3;
+			}
+		} else {
+			if ((ctrl->rankmap[1][0] | ctrl->rankmap[1][1]) != 3) {
+				table = 4;
+			} else {
+				table = 5;
+			}
+		}
+	} else {
+		printram
+		    ("Huh, no dimms? m00 = %d m01 = %d m10 = %d m11 = %d dpc = %d\n",
+		     ctrl->rankmap[0][0], ctrl->rankmap[0][1],
+		     ctrl->rankmap[1][0], ctrl->rankmap[1][1], dimms_per_ch);
+		die("");
+	}
+
+	return odt_map[rank][table];
+}
+
+static void
+write_mrreg (ramctr_timing * ctrl, int channel, int slotrank, int reg, u32 val)
+{
+	u32 r32;
+	do {
+		r32 = MCHBAR32(0x428c + 0x400 * channel);
+	} while ((r32 & 0x50) == 0);
+
+	printk (BIOS_ERR, "MRd: %x <= %x\n", reg, val);
+
+	if (ctrl->rank_mirror[channel][slotrank]) {
+		reg = ((reg >> 1) & 1) | ((reg << 1) & 2);
+		val = (val & ~0x1f8) | ((val >> 1) & 0xa8) | ((val & 0xa8) << 1);
+	}
+
+	printk (BIOS_ERR, "MRd: %x <= %x\n", reg, val);
+
+  write32 (DEFAULT_MCHBAR + 0x4220 + 0x400 * channel, 0x0f000);
+  write32 (DEFAULT_MCHBAR + 0x4230 + 0x400 * channel, 0x41001);
+  write32 (DEFAULT_MCHBAR + 0x4200 + 0x400 * channel, (slotrank << 24) | (reg << 20) | val | 0x60000);
+  write32 (DEFAULT_MCHBAR + 0x4210 + 0x400 * channel, 0);
+
+  write32 (DEFAULT_MCHBAR + 0x4224 + 0x400 * channel, 0x1f000);
+  write32 (DEFAULT_MCHBAR + 0x4234 + 0x400 * channel, 0x41001);
+  write32 (DEFAULT_MCHBAR + 0x4204 + 0x400 * channel, (slotrank << 24) | (reg << 20) | val | 0x60000);
+  write32 (DEFAULT_MCHBAR + 0x4214 + 0x400 * channel, 0);
+
+  write32 (DEFAULT_MCHBAR + 0x4228 + 0x400 * channel, 0x0f000);
+  write32 (DEFAULT_MCHBAR + 0x4238 + 0x400 * channel, 0x1001 | (ctrl->delay1 << 16));
+  write32 (DEFAULT_MCHBAR + 0x4208 + 0x400 * channel, (slotrank << 24) | (reg << 20) | val | 0x60000);
+  write32 (DEFAULT_MCHBAR + 0x4218 + 0x400 * channel, 0);
+  write32 (DEFAULT_MCHBAR + 0x4284 + 0x400 * channel, 0x80001);
+}
+
+static u32 make_mr0(ramctr_timing *ctrl, u8 rank)
+{
+	u16 mr0reg, mch_cas, mch_wr;
+	static const u8 mch_wr_t[12] = { 1, 2, 3, 4, 0, 5, 0, 6, 0, 7, 0, 0 };
+	mr0reg = 0x100;
+
+	// Convert CAS to MCH register friendly
+	if (ctrl->CAS < 12) {
+		mch_cas = (u16) ((ctrl->CAS - 4) << 1);
+	} else {
+		mch_cas = (u16) (ctrl->CAS - 12);
+		mch_cas = ((mch_cas << 1) | 0x1);
+	}
+
+	// Convert tWR to MCH register friendly
+	mch_wr = mch_wr_t[ctrl->tWR - 5];
+
+	mr0reg = (mr0reg & ~0x4) | (mch_cas & 0x1);
+	mr0reg = (mr0reg & ~0x70) | ((mch_cas & 0xe) << 3);
+	mr0reg = (mr0reg & ~0xe00) | (mch_wr << 9);
+	// Fast (desktop) 0x1 or slow (mobile) 0x0
+	mr0reg = (mr0reg & ~0x1000) | (!ctrl->mobile << 12);
+	return mr0reg;
+}
+
+static void dram_mr0(ramctr_timing * ctrl, u8 rank)
+{
+	int channel;
+
+	FOR_ALL_POPULATED_CHANNELS
+		write_mrreg (ctrl, channel, rank, 0, make_mr0 (ctrl, rank));
+}
+
+static u32 encode_odt(u32 odt)
+{
+	switch (odt) {
+	case 30:
+		return (1 << 9) | (1 << 2);	// RZQ/8, RZQ/4
+	case 60:
+		return (1 << 2);	// RZQ/4
+	case 120:
+		return (1 << 6);	// RZQ/2
+	default:
+	case 0:
+		return 0;
+	}
+}
+
+static u32 make_mr1(ramctr_timing * ctrl, u8 rank)
+{
+	odtmap odt;
+	u32 mr1reg;
+
+	odt = get_ODT(ctrl, rank);
+	mr1reg = 0x2;
+
+	mr1reg |= encode_odt (odt.rttnom);
+
+	return mr1reg;
+}
+
+static void dram_mr1(ramctr_timing * ctrl, u8 rank)
+{
+	u16 mr1reg;
+	u8 ch;
+
+	mr1reg = make_mr1(ctrl, rank);
+
+	for (ch = 0; ch < 2; ch++) {
+		write_mrreg (ctrl, ch, rank, 1, mr1reg);
+	}
+}
+
+static void dram_mr2(ramctr_timing * ctrl, u8 rank)
+{
+	u16 pasr, cwl, asr, str, mr2reg;
+	u8 ch;
+	odtmap odt;
+
+	pasr = 0;
+	cwl = ctrl->CWL - 5;
+	asr =
+	    (ctrl->thermalrefresh & 0x1) & ((ctrl->thermalrefresh & 0x4) >> 2);
+	/* FIXME: compute STR.  */
+	str = 1;
+	odt = get_ODT(ctrl, rank);
+
+	mr2reg = 0;
+	mr2reg = (mr2reg & ~0x7) | pasr;
+	mr2reg = (mr2reg & ~0x38) | (cwl << 0x3);
+	mr2reg = (mr2reg & ~0x40) | (asr << 0x6);
+	mr2reg = (mr2reg & ~0x80) | (str << 0x7);
+	mr2reg |= (odt.rttwr / 60) << 9;
+
+	for (ch = 0; ch < 2; ch++) {
+		write_mrreg (ctrl, ch, rank, 2, mr2reg);
+	}
+}
+
+static void dram_mr3(ramctr_timing * ctrl, u8 rank)
+{
+	u16 mr3reg;
+	u8 ch;
+
+	mr3reg = 0;
+
+	for (ch = 0; ch < 2; ch++) {
+		write_mrreg (ctrl, ch, rank, 3, mr3reg);
+	}
+}
+
+static void dram_mrscommands(ramctr_timing * ctrl)
+{
+	u8 rank, ch;
+	u32 reg, addr;
+
+	for (rank = 0; rank < 4; rank++) {
+		// MR2
+		printram("MR2 rank %d...", rank);
+		dram_mr2(ctrl, rank);
+		printram("done\n");
+
+		// MR3
+		printram("MR3 rank %d...", rank);
+		dram_mr3(ctrl, rank);
+		printram("done\n");
+
+		// MR1
+		printram("MR1 rank %d...", rank);
+		dram_mr1(ctrl, rank);
+		printram("done\n");
+
+		// MR0
+		printram("MR0 rank %d...", rank);
+		dram_mr0(ctrl, rank);
+		printram("done\n");
+	}
+
+	write32 (DEFAULT_MCHBAR + 0x4e20, 0x7);
+	write32 (DEFAULT_MCHBAR + 0x4e30, 0xf1001);
+	write32 (DEFAULT_MCHBAR + 0x4e00, 0x60002);
+	write32 (DEFAULT_MCHBAR + 0x4e10, 0);
+	write32 (DEFAULT_MCHBAR + 0x4e24, 0x1f003);
+	write32 (DEFAULT_MCHBAR + 0x4e34, 0x1901001);
+	write32 (DEFAULT_MCHBAR + 0x4e04, 0x60400);
+	write32 (DEFAULT_MCHBAR + 0x4e14, 0x288);
+	write32 (DEFAULT_MCHBAR + 0x4e84, 0x40004);
+
+	// Drain
+	for (ch = 0; ch < 2; ch++) {
+		// Wait for ref drained
+		addr = (ch == 0) ? 0x428c : 0x468c;
+		do {
+			reg = MCHBAR32(addr);
+		} while ((reg & 0x50) == 0);
+	}
+
+	// Refresh enable
+	reg = MCHBAR32(0x5030);
+	reg = (reg & ~0x8) | (1 << 0x3);
+	MCHBAR32(0x5030) = reg;
+
+	for (ch = 0; ch < 2; ch++) {
+		if ((ctrl->rankmap[ch][0] | ctrl->rankmap[ch][1]) != 0) {
+			addr = (ch == 0) ? 0x4020 : 0x4420;
+			reg = MCHBAR32(addr);
+			reg &= ~0x200000;
+			MCHBAR32(addr) = reg;
+
+			addr = (ch == 0) ? 0x428c : 0x468c;
+			reg = MCHBAR32(addr);
+
+			if ((reg & 0x10) == 0x10) {
+				printram("ERROR: Refresh enable failed\n");
+			} else {
+				printram("Refresh enable worked\n");
+			}
+
+			rank = (ctrl->rankmap[ch][0] != 0) ? 0 : 2;
+
+			// Drain
+			addr = (ch == 0) ? 0x428c : 0x468c;
+			do {
+				reg = MCHBAR32(addr);
+			} while ((reg & 0x50) == 0);
+
+			write32 (DEFAULT_MCHBAR + 0x4220 + ch * 0x400, 0x0f003);
+			write32 (0x4230 + 0x400 * ch + DEFAULT_MCHBAR, 0x659001);
+			write32 (DEFAULT_MCHBAR + 0x4200 + 0x400 * ch,
+				 (rank << 24) | (6 << 16));
+			write32 (DEFAULT_MCHBAR + 0x4210 + ch * 0x400, 0x3e0);
+
+			// Drain
+			addr = (ch == 0) ? 0x428c : 0x468c;
+			do {
+				reg = MCHBAR32(addr);
+			} while ((reg & 0x50) == 0);
+		}
+	}
+}
+
+static void
+wait_428c (int channel)
+{
+  while (1)
+    {
+      if (read32 (DEFAULT_MCHBAR | 0x428c | (channel << 10)) & 0x50)
+	return;
+    }
+}
+
+const u32 lane_registers[] = {0x0000, 0x0200, 0x0400, 0x0600,
+			      0x1000, 0x1200, 0x1400, 0x1600,
+			      0x0800 };
+
+static int
+clamp (int val, int low, int up)
+{
+	if (val < low)
+		return low;
+	if (val > up)
+		return up;
+	return val;
+}
+
+static void
+program_timings (ramctr_timing *ctrl, int channel)
+{
+	u32 reg32, reg_4024, reg_c14, reg_c18, reg_4028;
+  int lane;
+  int slotrank, slot;
+  int full_shift = 0;
+  u16 slot320c[NUM_SLOTS];
+
+  FOR_ALL_POPULATED_RANKS
+	  if (full_shift < -ctrl->timings[channel][slotrank].val_320c)
+		  full_shift = -ctrl->timings[channel][slotrank].val_320c;
+
+  for (slot = 0; slot < NUM_SLOTS; slot++)
+	  switch (ctrl->rankmap[channel][slot])
+	  {
+	  case 0:
+	  default:
+		  slot320c[slot] = 0x7f;
+		  break;
+	  case 1:
+		  slot320c[slot] = ctrl->timings[channel][2 * slot + 0].val_320c + full_shift;
+		  break;
+	  case 2:
+		  slot320c[slot] = ctrl->timings[channel][2 * slot + 1].val_320c + full_shift;
+		  break;
+	  case 3:
+		  slot320c[slot] = (ctrl->timings[channel][2 * slot].val_320c
+				    + ctrl->timings[channel][2 * slot + 1].val_320c) / 2 + full_shift;
+		  break;
+	  }
+
+  reg32 = (1 << 17) | (1 << 14);
+  reg32 |= ((slot320c[0] & 0x3f) << 6) | ((slot320c[0] & 0x40) << 9);
+  reg32 |= (slot320c[1] & 0x7f) << 18;
+  reg32 |= (full_shift & 0x3f) | ((full_shift & 0x40) << 6);
+
+  MCHBAR32(0x320c + 0x100 * channel) = reg32;
+
+  reg_c14 = (ctrl->rankmap[channel][0] | (ctrl->rankmap[channel][1] << 2)) << 24;
+  reg_c18 = 0;
+
+  FOR_ALL_POPULATED_RANKS
+  {
+	  int shift = ctrl->timings[channel][slotrank].val_320c + full_shift;
+	  int offset_val_c14;
+	  if (shift < 0)
+		  shift = 0;
+	  offset_val_c14 = ctrl->reg_c14_offset + shift;
+	  reg_c14 |= (offset_val_c14 & 0x3f) << (6 * slotrank);
+	  reg_c18 |= ((offset_val_c14 >> 6) & 1) << slotrank;
+  }
+
+  MCHBAR32 (0xc14 + channel * 0x100) = reg_c14;
+  MCHBAR32 (0xc18 + channel * 0x100) = reg_c18;
+
+  reg_4028 = MCHBAR32 (0x4028 + channel * 0x400);
+  reg_4028 &= 0xffff0000;
+
+  reg_4024 = 0;
+
+  FOR_ALL_POPULATED_RANKS
+  {
+	  int post_timA_min_high = 7, post_timA_max_high = 0;
+	  int pre_timA_min_high = 7, pre_timA_max_high = 0;
+	  int shift_402x = 0;
+	  int shift = ctrl->timings[channel][slotrank].val_320c + full_shift;
+	  
+	  if (shift < 0)
+		  shift = 0;
+
+	  FOR_ALL_LANES
+	  {
+		  if (post_timA_min_high > ((ctrl->timings[channel][slotrank].lanes[lane].timA + shift) >> 6))
+			  post_timA_min_high = ((ctrl->timings[channel][slotrank].lanes[lane].timA + shift) >> 6);
+		  if (pre_timA_min_high > (ctrl->timings[channel][slotrank].lanes[lane].timA >> 6))
+			  pre_timA_min_high = (ctrl->timings[channel][slotrank].lanes[lane].timA >> 6);
+		  if (post_timA_max_high < ((ctrl->timings[channel][slotrank].lanes[lane].timA + shift) >> 6))
+			  post_timA_max_high = ((ctrl->timings[channel][slotrank].lanes[lane].timA + shift) >> 6);
+		  if (pre_timA_max_high < (ctrl->timings[channel][slotrank].lanes[lane].timA >> 6))
+			  pre_timA_max_high = (ctrl->timings[channel][slotrank].lanes[lane].timA >> 6);
+	  }
+
+	  if (pre_timA_max_high - pre_timA_min_high < post_timA_max_high - post_timA_min_high)
+		  shift_402x = +1;
+	  else if (pre_timA_max_high - pre_timA_min_high > post_timA_max_high - post_timA_min_high)
+		  shift_402x = -1;
+
+	  reg_4028 |= (ctrl->timings[channel][slotrank].val_4028 + shift_402x - post_timA_min_high) << (4 * slotrank);
+	  reg_4024 |= (ctrl->timings[channel][slotrank].val_4024 + shift_402x) << (8 * slotrank);
+
+	  FOR_ALL_LANES
+	  {
+		  MCHBAR32 (lane_registers[lane] + 0x10 + 0x100 * channel + 4 * slotrank)
+			  = (((ctrl->timings[channel][slotrank].lanes[lane].timA + shift) & 0x3f)
+			     | ((ctrl->timings[channel][slotrank].lanes[lane].rising + shift) << 8)
+			     | (((ctrl->timings[channel][slotrank].lanes[lane].timA + shift
+				  - (post_timA_min_high << 6)) & 0x1c0) << 10)
+			     | (ctrl->timings[channel][slotrank].lanes[lane].falling << 20));
+
+		  MCHBAR32 (lane_registers[lane] + 0x20 + 0x100 * channel + 4 * slotrank)
+			  = ((clamp(ctrl->timings[channel][slotrank].lanes[lane].timC + shift, 0, 127) & 0x3f)
+			     | (((ctrl->timings[channel][slotrank].lanes[lane].timB + shift) & 0x3f) << 8)
+			     | (((ctrl->timings[channel][slotrank].lanes[lane].timB + shift) & 0x1c0) << 9)
+			     | ((clamp(ctrl->timings[channel][slotrank].lanes[lane].timC + shift, 0, 127) & 0x40) << 13));
+	  }
+  }
+  MCHBAR32 (0x4024 + channel * 0x400) = reg_4024;
+  MCHBAR32 (0x4028 + channel * 0x400) = reg_4028;
+}
+
+static void
+test_timA (ramctr_timing *ctrl, int channel, int slotrank)
+{
+	wait_428c (channel);
+
+	write32 (DEFAULT_MCHBAR + 0x4220 + 0x400 * channel, 0x1f000);
+	write32 (DEFAULT_MCHBAR + 0x4230 + 0x400 * channel, (0xc01 | (ctrl->delay1 << 16)));
+	write32 (DEFAULT_MCHBAR + 0x4200 + 0x400 * channel, (slotrank << 24) | 0x360004);
+	write32 (DEFAULT_MCHBAR + 0x4210 + channel * 0x400, 0);
+
+	write32 (DEFAULT_MCHBAR + 0x4224 + 0x400 * channel, 0x1f105);
+	write32 (DEFAULT_MCHBAR + 0x4234 + 0x400 * channel, 0x4040c01);
+	write32 (DEFAULT_MCHBAR + 0x4204 + 0x400 * channel, (slotrank << 24));
+	write32 (DEFAULT_MCHBAR + 0x4214 + channel * 0x400, 0);
+
+	write32 (DEFAULT_MCHBAR + 0x4228 + channel * 0x400, 0x1f105);
+	write32 (DEFAULT_MCHBAR + 0x4238 + 0x400 * channel, 0x100f | ((ctrl->CAS + 36) << 16));
+	write32 (DEFAULT_MCHBAR + 0x4208 + 0x400 * channel, (slotrank << 24) | 0x60000);
+	write32 (DEFAULT_MCHBAR + 0x4218 + channel * 0x400, 0);
+
+	write32 (DEFAULT_MCHBAR + 0x422c + 0x400 * channel, 0x1f000);
+	write32 (DEFAULT_MCHBAR + 0x423c + 0x400 * channel, (0xc01 | (ctrl->delay1 << 16)));
+	write32 (DEFAULT_MCHBAR + 0x420c + 0x400 * channel, (slotrank << 24) | 0x360000);
+	write32 (DEFAULT_MCHBAR + 0x421c + channel * 0x400, 0);
+
+	write32 (DEFAULT_MCHBAR + 0x4284 + 0x400 * channel, 0xc0001);
+
+	wait_428c (channel);
+}
+
+static int
+does_lane_work(ramctr_timing *ctrl, int channel, int slotrank, int lane)
+{
+	u32 timA = ctrl->timings[channel][slotrank].lanes[lane].timA;
+	return ((read32 (DEFAULT_MCHBAR + lane_registers[lane] +
+			 channel * 0x100 + 4 + ((timA / 32) & 1) * 4)
+		 >> (timA % 32)) & 1);
+}
+
+struct run
+{
+	int middle;
+	int end;
+	int start;
+	int all;
+};
+
+static struct run
+get_longest_zero_run (int *seq, int sz)
+{
+  int i, ls;
+  int bl = 0, bs = 0;
+  struct run ret;
+
+  ls = 0;
+  for (i = 0; i < 2 * sz; i++)
+    if (seq[i % sz])
+      {
+	if (i - ls > bl)
+	  {
+	    bl = i - ls;
+	    bs = ls;
+	  }
+	ls = i + 1;
+      }
+  if (bl == 0)
+    {
+      ret.middle = sz / 2;
+      ret.start = 0;
+      ret.end = sz;
+      ret.all = 1;
+      return ret;
+    }
+
+  ret.start = bs % sz;
+  ret.end = (bs + bl - 1) % sz;
+  ret.middle = (bs + (bl - 1) / 2) % sz;
+  ret.all = 0;
+
+  return ret;
+}
+
+static void
+discover_timA_coarse (ramctr_timing *ctrl, int channel, int slotrank, int *upperA)
+{
+	int timA;
+	int statistics[NUM_LANES][128];
+	int lane;
+
+	for (timA = 0; timA < 128; timA++) {
+		FOR_ALL_LANES
+			ctrl->timings[channel][slotrank].lanes[lane].timA = timA;
+		program_timings (ctrl, channel);
+
+		test_timA(ctrl, channel, slotrank);
+
+		FOR_ALL_LANES {
+			statistics[lane][timA] = !does_lane_work (ctrl, channel, slotrank, lane);
+			printk(BIOS_ERR, "Astat: %d, %d, %d, %x, %x\n", channel, slotrank, lane, timA,
+			       statistics[lane][timA]);
+		}
+	}
+	FOR_ALL_LANES
+	{
+		struct run rn = get_longest_zero_run (statistics[lane], 128);
+		ctrl->timings[channel][slotrank].lanes[lane].timA = rn.middle;
+		upperA[lane] = rn.end;
+		if (upperA[lane] < rn.middle)
+			upperA[lane] += 128;
+		printk(BIOS_ERR, "Aval: %d, %d, %d, %x\n", channel, slotrank, lane, ctrl->timings[channel][slotrank].lanes[lane].timA);
+		printk(BIOS_ERR, "Aend: %d, %d, %d, %x\n", channel, slotrank, lane, upperA[lane]);
+	}
+}
+
+static void
+discover_timA_fine (ramctr_timing *ctrl, int channel, int slotrank, int *upperA)
+{
+	int timA_delta;
+	int statistics[NUM_LANES][51];
+	int lane, i;
+
+	memset (statistics, 0, sizeof (statistics));
+
+	for (timA_delta = -25; timA_delta <= 25; timA_delta++) {
+		FOR_ALL_LANES
+			ctrl->timings[channel][slotrank].lanes[lane].timA = upperA[lane] + timA_delta + 0x40;
+		program_timings (ctrl, channel);
+
+		for (i = 0; i < 100; i++)
+		{
+			test_timA(ctrl, channel, slotrank);
+			FOR_ALL_LANES {
+				statistics[lane][timA_delta + 25] += does_lane_work (ctrl, channel, slotrank, lane);
+			}
+		}
+		FOR_ALL_LANES {
+			printk(BIOS_ERR, "A+stat: %d, %d, %d, %d (%x), %x\n", channel, slotrank, lane, timA_delta,
+			       upperA[lane] + timA_delta + 0x40,
+			       statistics[lane][timA_delta + 25]);
+		}
+	}
+	FOR_ALL_LANES
+	{
+		int last_zero, first_all;
+
+		for (last_zero = -25; last_zero <= 25; last_zero++)
+			if (statistics[lane][last_zero + 25])
+				break;
+		last_zero--;
+		for (first_all = -25; first_all <= 25; first_all++)
+			if (statistics[lane][first_all + 25] == 100)
+				break;
+
+		printk (BIOS_ERR, "lane %d: %d, %d\n", lane, last_zero, first_all);
+
+		ctrl->timings[channel][slotrank].lanes[lane].timA = (last_zero + first_all) / 2 + upperA[lane];
+		printk(BIOS_ERR, "Aval: %d, %d, %d, %x\n", channel, slotrank, lane, ctrl->timings[channel][slotrank].lanes[lane].timA);
+	}
+}
+
+static void
+discover_402x (ramctr_timing *ctrl, int channel, int slotrank, int *upperA)
+{
+	int works[NUM_LANES];
+	int lane;
+	while (1)
+	{
+		int all_works = 1, some_works = 0;
+		program_timings (ctrl, channel);
+		test_timA(ctrl, channel, slotrank);
+		FOR_ALL_LANES
+		{
+			works[lane] = !does_lane_work(ctrl, channel, slotrank, lane);
+			if (works[lane])
+				some_works = 1;
+			else
+				all_works = 0;
+		}
+		if (all_works)
+			return;
+		if (!some_works) {
+			if (ctrl->timings[channel][slotrank].val_4024 < 2)
+				die ("402x discovery failed");
+			ctrl->timings[channel][slotrank].val_4024 -= 2;
+			printk (BIOS_ERR, "4024 -= 2;\n");
+			continue;
+		}
+		ctrl->timings[channel][slotrank].val_4028 += 2;
+		printk (BIOS_ERR, "4028 += 2;\n");
+		if (ctrl->timings[channel][slotrank].val_4028 >= 0x10)
+			die ("402x discovery failed");
+		FOR_ALL_LANES
+			if (works[lane]) {
+				ctrl->timings[channel][slotrank].lanes[lane].timA += 128;
+				upperA[lane] += 128;
+				printk (BIOS_ERR, "increment %d, %d, %d\n", channel, slotrank, lane);
+			}
+	}
+}
+
+struct timA_minmax
+{
+	int timA_min_high, timA_max_high;
+};
+
+static void
+pre_timA_change (ramctr_timing *ctrl, int channel, int slotrank, struct timA_minmax *mnmx)
+{
+	int lane;
+	mnmx->timA_min_high = 7;
+	mnmx->timA_max_high = 0;
+
+	FOR_ALL_LANES {
+		if (mnmx->timA_min_high > (ctrl->timings[channel][slotrank].lanes[lane].timA >> 6))
+			mnmx->timA_min_high = (ctrl->timings[channel][slotrank].lanes[lane].timA >> 6);
+		if (mnmx->timA_max_high < (ctrl->timings[channel][slotrank].lanes[lane].timA >> 6))
+			mnmx->timA_max_high = (ctrl->timings[channel][slotrank].lanes[lane].timA >> 6);
+	}
+}
+
+static void
+post_timA_change (ramctr_timing *ctrl, int channel, int slotrank, struct timA_minmax *mnmx)
+{
+	struct timA_minmax post;
+	int shift_402x = 0;
+
+	/* Get changed maxima.  */
+	pre_timA_change (ctrl, channel, slotrank, &post);
+
+	if (mnmx->timA_max_high - mnmx->timA_min_high < post.timA_max_high - post.timA_min_high)
+		shift_402x = +1;
+	else if (mnmx->timA_max_high - mnmx->timA_min_high > post.timA_max_high - post.timA_min_high)
+		shift_402x = -1;
+	else
+		shift_402x = 0;
+
+	ctrl->timings[channel][slotrank].val_4028 += shift_402x;
+	ctrl->timings[channel][slotrank].val_4024 += shift_402x;
+	printk (BIOS_ERR, "4024 += %d;\n", shift_402x);
+	printk (BIOS_ERR, "4028 += %d;\n", shift_402x);
+}
+
+static void
+read_training (ramctr_timing *ctrl)
+{
+  int channel, slotrank, lane;
+
+  FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS
+  {
+	  u32 r32;
+	  int all_high, some_high;
+	  int upperA[NUM_LANES];
+	  struct timA_minmax mnmx;
+
+	  wait_428c (channel);
+	  write32 (DEFAULT_MCHBAR + 0x4220 + channel * 0x400, 0x1f002);
+	  write32 (DEFAULT_MCHBAR + 0x4230 + channel * 0x400,
+		   0xc01 | (ctrl->tRP << 16));
+	  write32 (DEFAULT_MCHBAR + 0x4200 + channel * 0x400,
+		   (slotrank << 24) | 0x60400);
+	  write32 (DEFAULT_MCHBAR + 0x4210 + channel * 0x400, 0);
+	  write32 (DEFAULT_MCHBAR + 0x4284 + channel * 0x400, 1);
+
+	  write32 (DEFAULT_MCHBAR + 0x3400, (slotrank << 2) | 0x8001);
+
+	  ctrl->timings[channel][slotrank].val_4028 = 4;
+	  ctrl->timings[channel][slotrank].val_4024 = 55;
+	  program_timings (ctrl, channel);
+      
+	  discover_timA_coarse (ctrl, channel, slotrank, upperA);
+
+	  all_high = 1;
+	  some_high = 0;
+	  FOR_ALL_LANES
+		  if (ctrl->timings[channel][slotrank].lanes[lane].timA >= 0x40)
+			  some_high = 1;
+		  else
+			  all_high = 0;
+	  if (all_high) {
+		  ctrl->timings[channel][slotrank].val_4028--;
+		  printk (BIOS_ERR, "4028--;\n");
+		  FOR_ALL_LANES {
+			  ctrl->timings[channel][slotrank].lanes[lane].timA -= 0x40;
+			  upperA[lane] -= 0x40;
+		  }
+	  } else if (some_high) {
+		  ctrl->timings[channel][slotrank].val_4024++;
+		  ctrl->timings[channel][slotrank].val_4028++;
+		  printk (BIOS_ERR, "4024++;\n");
+		  printk (BIOS_ERR, "4028++;\n");
+	  }
+
+	  program_timings (ctrl, channel);
+
+	  pre_timA_change (ctrl, channel, slotrank, &mnmx);
+
+	  discover_402x (ctrl, channel, slotrank, upperA);
+
+	  post_timA_change (ctrl, channel, slotrank, &mnmx);
+	  pre_timA_change (ctrl, channel, slotrank, &mnmx);
+
+	  discover_timA_fine (ctrl, channel, slotrank,upperA);
+
+	  post_timA_change (ctrl, channel, slotrank, &mnmx);
+	  pre_timA_change (ctrl, channel, slotrank, &mnmx);
+
+	  FOR_ALL_LANES
+		  ctrl->timings[channel][slotrank].lanes[lane].timA -= mnmx.timA_min_high * 0x40;
+	  ctrl->timings[channel][slotrank].val_4028 -= mnmx.timA_min_high;
+	  printk (BIOS_ERR, "4028 -= %d;\n", mnmx.timA_min_high);
+
+	  post_timA_change (ctrl, channel, slotrank, &mnmx);
+
+	  printk (BIOS_ERR, "4/8: %d, %d, %x, %x\n", channel, slotrank,
+		  ctrl->timings[channel][slotrank].val_4024,
+		  ctrl->timings[channel][slotrank].val_4028);
+
+	  FOR_ALL_LANES
+		  printk (BIOS_ERR, "%d, %d, %d, %x\n", channel, slotrank, lane,
+			  ctrl->timings[channel][slotrank].lanes[lane].timA);
+
+	  write32 (DEFAULT_MCHBAR + 0x3400, 0);
+
+	  r32 = read32 (DEFAULT_MCHBAR + 0x5030);
+	  write32 (DEFAULT_MCHBAR + 0x5030, r32 | 0x20);
+	  udelay (1);
+
+	  write32 (DEFAULT_MCHBAR + 0x5030, r32 & ~0x20);
+
+	  udelay (1);
+  }
+
+  FOR_ALL_POPULATED_CHANNELS
+	  program_timings (ctrl, channel);
+  FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS
+    FOR_ALL_LANES
+	  write32 (0x4080 + 4 * lane + 0x400 * channel + DEFAULT_MCHBAR, 0);
+}
+
+static void
+test_timC (ramctr_timing *ctrl, int channel, int slotrank)
+{
+	int lane;
+
+	FOR_ALL_LANES
+	{
+		write32 (DEFAULT_MCHBAR + 0x4340 + channel * 0x400 +
+			 4 * lane, 0);
+		read32 (DEFAULT_MCHBAR + 0x4140 + channel * 0x400 +
+			4 * lane);
+	}
+
+	wait_428c (channel);
+
+	write32 (DEFAULT_MCHBAR + 0x4220 + channel * 0x400, 0x1f006);
+	write32 (DEFAULT_MCHBAR + 0x4230 + 0x400 * channel, (max ((ctrl->tFAW >> 2) + 1, ctrl->tRRD) << 10)
+		 | 4 | (ctrl->tRCD << 16));
+
+	write32 (DEFAULT_MCHBAR + 0x4200 + 0x400 * channel,
+		 (slotrank << 24) | (6 << 16));
+
+	write32 (DEFAULT_MCHBAR + 0x4210 + channel * 0x400, 0x244);
+
+	write32 (DEFAULT_MCHBAR + 0x4224 + channel * 0x400, 0x1f207);
+	write32 (DEFAULT_MCHBAR + 0x4234 + 0x400 * channel, 0x8041001);
+	write32 (DEFAULT_MCHBAR + 0x4204 + 0x400 * channel, (slotrank << 24) | 8);
+	write32 (DEFAULT_MCHBAR + 0x4214 + channel * 0x400, 0x3e0);
+		
+	write32 (DEFAULT_MCHBAR + 0x4228 + channel * 0x400, 0x1f201);
+	write32 (DEFAULT_MCHBAR + 0x4238 + 0x400 * channel, 0x80411f4);
+	write32 (DEFAULT_MCHBAR + 0x4208 + 0x400 * channel, (slotrank << 24));
+	write32 (DEFAULT_MCHBAR + 0x4218 + channel * 0x400, 0x242);
+
+	write32 (DEFAULT_MCHBAR + 0x422c + channel * 0x400, 0x1f207);
+	write32 (DEFAULT_MCHBAR + 0x423c + 0x400 * channel, 0x8000c01
+		 | ((ctrl->CWL + ctrl->tWTR + 5) << 16));
+	write32 (DEFAULT_MCHBAR + 0x420c + 0x400 * channel, (slotrank << 24) | 8);
+	write32 (DEFAULT_MCHBAR + 0x421c + channel * 0x400, 0x3e0);
+
+	write32 (DEFAULT_MCHBAR + 0x4284 + channel * 0x400, 0xc0001);
+
+	wait_428c (channel);
+
+	write32 (DEFAULT_MCHBAR + 0x4220 + channel * 0x400, 0x1f002);
+	write32 (DEFAULT_MCHBAR + 0x4230 + 0x400 * channel, 0xc01 | (ctrl->tRP << 16));
+	write32 (DEFAULT_MCHBAR + 0x4200 + 0x400 * channel,
+		 (slotrank << 24) | 0x60400);
+	write32 (DEFAULT_MCHBAR + 0x4210 + channel * 0x400, 0x240);
+
+	write32 (DEFAULT_MCHBAR + 0x4224 + channel * 0x400, 0x1f006);
+	write32 (DEFAULT_MCHBAR + 0x4234 + 0x400 * channel,
+		 (max (ctrl->tRRD, (ctrl->tFAW >> 2) + 1) << 10)
+		 | 8 | (ctrl->CAS << 16));
+
+	write32 (DEFAULT_MCHBAR + 0x4204 + 0x400 * channel, (slotrank << 24) | 0x60000);
+
+	write32 (DEFAULT_MCHBAR + 0x4214 + channel * 0x400, 0x244);
+
+	write32 (DEFAULT_MCHBAR + 0x4228 + channel * 0x400, 0x1f105);
+	write32 (DEFAULT_MCHBAR + 0x4238 + 0x400 * channel, 0x40011f4 | (max (ctrl->tRTP, 8) << 16));
+	write32 (DEFAULT_MCHBAR + 0x4208 + 0x400 * channel, (slotrank << 24));
+	write32 (DEFAULT_MCHBAR + 0x4218 + channel * 0x400, 0x242);
+
+	write32 (DEFAULT_MCHBAR + 0x422c + channel * 0x400, 0x1f002);
+	write32 (DEFAULT_MCHBAR + 0x423c + 0x400 * channel, 0xc01 | (ctrl->tRP << 16));
+	write32 (DEFAULT_MCHBAR + 0x420c + 0x400 * channel,
+		 (slotrank << 24) | 0x60400);
+	write32 (DEFAULT_MCHBAR + 0x421c + channel * 0x400, 0x240);
+	write32 (DEFAULT_MCHBAR + 0x4284 + channel * 0x400, 0xc0001);
+	wait_428c (channel);
+}
+
+static void
+discover_timC (ramctr_timing *ctrl, int channel, int slotrank)
+{
+	int timC;
+	int statistics[NUM_LANES][MAX_TIMC + 1];
+	int lane;
+
+	wait_428c (channel);
+
+	write32 (DEFAULT_MCHBAR + 0x4220 + channel * 0x400, 0x1f002);
+	write32 (DEFAULT_MCHBAR + 0x4230 + 0x400 * channel, 0xc01 | (ctrl->tRP << 16));
+	write32 (DEFAULT_MCHBAR + 0x4200 + 0x400 * channel,
+		 (slotrank << 24) | 0x60400);
+	write32 (DEFAULT_MCHBAR + 0x4210 + channel * 0x400, 0x240);
+	write32 (DEFAULT_MCHBAR + 0x4284 + channel * 0x400, 1);
+
+	for (timC = 0; timC <= MAX_TIMC; timC++) {
+		FOR_ALL_LANES
+			ctrl->timings[channel][slotrank].lanes[lane].timC = timC;
+		program_timings (ctrl, channel);
+
+		test_timC (ctrl, channel, slotrank);
+
+		FOR_ALL_LANES
+		{
+			statistics[lane][timC] = read32 (DEFAULT_MCHBAR + 0x4340 + 4 * lane +
+							 channel * 0x400);
+			printk(BIOS_ERR, "Cstat: %d, %d, %d, %x, %x\n", channel, slotrank, lane, timC,
+			       statistics[lane][timC]);
+		}
+	}
+	FOR_ALL_LANES
+	{
+		struct run rn = get_longest_zero_run (statistics[lane], MAX_TIMC + 1);
+		ctrl->timings[channel][slotrank].lanes[lane].timC = rn.middle;
+		if (rn.all)
+			die ("timC discovery failed");
+		printk(BIOS_ERR, "Cval: %d, %d, %d, %x\n", channel, slotrank, lane, ctrl->timings[channel][slotrank].lanes[lane].timC);
+	}
+}
+
+static int
+get_precedening_channels (ramctr_timing *ctrl, int target_channel)
+{
+	int channel, ret = 0;
+	FOR_ALL_POPULATED_CHANNELS
+		if (channel < target_channel)
+			ret++;
+	return ret;
+}
+
+static void
+fill_pattern0 (ramctr_timing *ctrl, int channel, u32 a, u32 b)
+{
+  unsigned j;
+  unsigned channel_offset = get_precedening_channels (ctrl, channel) * 0x40;
+  printk (BIOS_ERR, "channel_offset=%x\n", channel_offset);
+  for (j = 0 ; j < 16; j++)
+	  write32 (0x04000000 + channel_offset + 4 * j, j & 2 ? b : a);
+  sfence ();
+}
+static int num_of_channels (const ramctr_timing *ctrl)
+{
+	int ret = 0;
+	int channel;
+	FOR_ALL_POPULATED_CHANNELS
+		ret++;
+	return ret;
+}
+
+static void fill_pattern1 (ramctr_timing *ctrl, int channel)
+{
+	unsigned j;
+	unsigned channel_offset = get_precedening_channels (ctrl, channel) * 0x40;
+	unsigned channel_step = 0x40 * num_of_channels (ctrl);
+	for (j = 0 ; j < 16; j++)
+		write32 (0x04000000 + channel_offset + j * 4, 0xffffffff);
+	for (j = 0 ; j < 16; j++)
+		write32 (0x04000000 + channel_offset + channel_step + j * 4, 0);
+	sfence ();
+}
+
+static void
+precharge (ramctr_timing *ctrl)
+{
+	int channel, slotrank, lane;
+
+	FOR_ALL_POPULATED_CHANNELS
+	{
+		FOR_ALL_POPULATED_RANKS
+			FOR_ALL_LANES
+		{
+			ctrl->timings[channel][slotrank].lanes[lane].falling = 16;
+			ctrl->timings[channel][slotrank].lanes[lane].rising = 16;
+		}
+
+		program_timings (ctrl, channel);
+
+		FOR_ALL_POPULATED_RANKS
+		{
+			wait_428c (channel);
+
+			write32 (DEFAULT_MCHBAR + 0x4220 + 0x400 * channel, 0x1f000);
+			write32 (0x4230 + 0x400 * channel + DEFAULT_MCHBAR, 0xc01 | (ctrl->delay1 << 16));
+			write32 (DEFAULT_MCHBAR + 0x4200 + channel * 0x400, (slotrank << 24) | 0x360004);
+			write32 (DEFAULT_MCHBAR + 0x4210 + channel * 0x400, 0);
+
+			write32 (DEFAULT_MCHBAR + 0x4224 + 0x400 * channel, 0x1f105);
+			write32 (0x4234 + 0x400 * channel + DEFAULT_MCHBAR, 0x4041003);
+			write32 (DEFAULT_MCHBAR + 0x4204 + channel * 0x400, (slotrank << 24) | 0);
+			write32 (DEFAULT_MCHBAR + 0x4214 + channel * 0x400, 0);
+
+			write32 (DEFAULT_MCHBAR + 0x4228 + 0x400 * channel, 0x1f105);
+			write32 (0x4238 + 0x400 * channel + DEFAULT_MCHBAR, 0x1001 | ((ctrl->CAS + 8) << 16));
+			write32 (DEFAULT_MCHBAR + 0x4208 + channel * 0x400, (slotrank << 24) | 0x60000);
+			write32 (DEFAULT_MCHBAR + 0x4218 + channel * 0x400, 0);
+
+			write32 (DEFAULT_MCHBAR + 0x422c + 0x400 * channel, 0x1f000);
+			write32 (0x423c + 0x400 * channel + DEFAULT_MCHBAR, 0xc01 | (ctrl->delay1 << 16));
+			write32 (DEFAULT_MCHBAR + 0x420c + channel * 0x400, (slotrank << 24) | 0x360000);
+			write32 (DEFAULT_MCHBAR + 0x421c + channel * 0x400, 0);
+			write32 (DEFAULT_MCHBAR + 0x4284 + 0x400 * channel, 0xc0001);
+
+			wait_428c (channel);
+		}
+
+		FOR_ALL_POPULATED_RANKS
+			FOR_ALL_LANES
+		{
+			ctrl->timings[channel][slotrank].lanes[lane].falling = 48;
+			ctrl->timings[channel][slotrank].lanes[lane].rising = 48;
+		}
+
+		program_timings (ctrl, channel);
+
+		FOR_ALL_POPULATED_RANKS
+		{
+			wait_428c (channel);
+
+			write32 (DEFAULT_MCHBAR + 0x4220 + 0x400 * channel, 0x1f000);
+			write32 (0x4230 + 0x400 * channel + DEFAULT_MCHBAR, 0xc01 | (ctrl->delay1 << 16));
+			write32 (DEFAULT_MCHBAR + 0x4200 + channel * 0x400, (slotrank << 24) | 0x360004);
+			write32 (DEFAULT_MCHBAR + 0x4210 + channel * 0x400, 0);
+
+			write32 (DEFAULT_MCHBAR + 0x4224 + 0x400 * channel, 0x1f105);
+			write32 (0x4234 + 0x400 * channel + DEFAULT_MCHBAR, 0x4041003);
+			write32 (DEFAULT_MCHBAR + 0x4204 + channel * 0x400, (slotrank << 24) | 0);
+			write32 (DEFAULT_MCHBAR + 0x4214 + channel * 0x400, 0);
+
+			write32 (DEFAULT_MCHBAR + 0x4228 + 0x400 * channel, 0x1f105);
+			write32 (0x4238 + 0x400 * channel + DEFAULT_MCHBAR, 0x1001 | ((ctrl->CAS + 8) << 16));
+			write32 (DEFAULT_MCHBAR + 0x4208 + channel * 0x400, (slotrank << 24) | 0x60000);
+			write32 (DEFAULT_MCHBAR + 0x4218 + channel * 0x400, 0);
+
+			write32 (DEFAULT_MCHBAR + 0x422c + 0x400 * channel, 0x1f000);
+			write32 (0x423c + 0x400 * channel + DEFAULT_MCHBAR, 0xc01 | (ctrl->delay1 << 16));
+
+			write32 (DEFAULT_MCHBAR + 0x420c + channel * 0x400, (slotrank << 24) | 0x360000);
+			write32 (DEFAULT_MCHBAR + 0x421c + channel * 0x400, 0);
+
+			write32 (DEFAULT_MCHBAR + 0x4284 + 0x400 * channel, 0xc0001);
+			wait_428c (channel);
+		}
+	}
+}
+
+static void
+test_timB (ramctr_timing *ctrl, int channel, int slotrank)
+{
+	write_mrreg (ctrl, channel, slotrank, 1, 0x80 | make_mr1 (ctrl, slotrank));
+
+	wait_428c (channel);
+	write32 (DEFAULT_MCHBAR + 0x4220 + 0x400 * channel, 0x1f207);
+	write32 (0x4230 + 0x400 * channel + DEFAULT_MCHBAR, 0x8000c01
+		 | ((ctrl->CWL + ctrl->delay2) << 16));
+	write32 (DEFAULT_MCHBAR + 0x4200 + 0x400 * channel, 8 | (slotrank << 24)); 
+	write32 (DEFAULT_MCHBAR + 0x4210 + channel * 0x400, 0);
+
+	write32 (DEFAULT_MCHBAR + 0x4224 + 0x400 * channel, 0x1f107);
+	write32 (0x4234 + 0x400 * channel + DEFAULT_MCHBAR, 0x4000c01 | ((ctrl->CAS + 38) << 16));
+	write32 (DEFAULT_MCHBAR + 0x4204 + 0x400 * channel, (slotrank << 24) | 4); 
+	write32 (DEFAULT_MCHBAR + 0x4214 + channel * 0x400, 0);
+
+	write32 (DEFAULT_MCHBAR + 0x400 * channel + 0x4284, 0x40001); 
+	wait_428c (channel);
+
+	write_mrreg (ctrl, channel, slotrank, 1, 0x1080 | make_mr1 (ctrl, slotrank));
+}
+
+static void
+discover_timB (ramctr_timing *ctrl, int channel, int slotrank)
+{
+	int timB;
+	int statistics[NUM_LANES][128];
+	int lane;
+
+	write32 (DEFAULT_MCHBAR + 0x3400, 0x108052 | (slotrank << 2));
+
+	for (timB = 0; timB < 128; timB++) {
+		FOR_ALL_LANES
+			ctrl->timings[channel][slotrank].lanes[lane].timB = timB;
+		program_timings (ctrl, channel);
+
+		test_timB (ctrl, channel, slotrank);
+
+		FOR_ALL_LANES {
+			statistics[lane][timB] = !((read32 (DEFAULT_MCHBAR + lane_registers[lane] +
+							    channel * 0x100 + 4 + ((timB / 32) & 1) * 4)
+						    >> (timB % 32)) & 1);
+			printk(BIOS_ERR, "Bstat: %d, %d, %d, %x, %x\n", channel, slotrank, lane, timB,
+			       statistics[lane][timB]);
+		}
+	}
+	FOR_ALL_LANES
+	{
+		struct run rn = get_longest_zero_run (statistics[lane], 128);
+		ctrl->timings[channel][slotrank].lanes[lane].timB = rn.end;
+		if (rn.all)
+			die ("timB discovery failed");
+		printk(BIOS_ERR, "Bval: %d, %d, %d, %x\n", channel, slotrank, lane, ctrl->timings[channel][slotrank].lanes[lane].timB);
+	}
+}
+
+static int
+get_timB_high_adjust (u64 val)
+{
+  int i;
+  if (val >= 0xfffffffffff00000LL)
+    return -1;
+  if (val >= 0xfffffff000000000LL)
+    return -2;
+  if (val >= 0xfff0000000000000LL)
+    return -3;
+
+  for (i = 0; i < 8; i++)
+    if (val >> (8 * (7 - i) + 4))
+      return i;
+  return 8;
+}
+
+static void
+adjust_high_timB (ramctr_timing *ctrl)
+{
+	int channel, slotrank, lane;
+	write32 (DEFAULT_MCHBAR + 0x3400, 0x200);
+	FOR_ALL_POPULATED_CHANNELS {
+		fill_pattern1 (ctrl, channel);
+		write32 (DEFAULT_MCHBAR | 0x4288 | (channel << 10), 1);
+	}
+	FOR_ALL_POPULATED_CHANNELS
+		FOR_ALL_POPULATED_RANKS {
+
+		write32 (DEFAULT_MCHBAR + 0x4288 + channel * 0x400, 0x10001);
+
+		wait_428c (channel);
+
+		write32 (DEFAULT_MCHBAR + 0x4220 + channel * 0x400, 0x1f006);
+		write32 (DEFAULT_MCHBAR + 0x4230 + 0x400 * channel, 0xc01 | (ctrl->tRCD << 16));
+		write32 (DEFAULT_MCHBAR + 0x4200 + 0x400 * channel, (slotrank << 24) | 0x60000);
+		write32 (DEFAULT_MCHBAR + 0x4210 + channel * 0x400, 0);
+
+		write32 (DEFAULT_MCHBAR + 0x4224 + channel * 0x400, 0x1f207);
+		write32 (DEFAULT_MCHBAR + 0x4234 + 0x400 * channel, 0x8040c01);
+		write32 (DEFAULT_MCHBAR + 0x4204 + 0x400 * channel, (slotrank << 24) | 0x8);
+		write32 (DEFAULT_MCHBAR + 0x4214 + channel * 0x400, 0x3e0);
+
+		write32 (DEFAULT_MCHBAR + 0x4228 + channel * 0x400, 0x1f201);
+		write32 (DEFAULT_MCHBAR + 0x4238 + 0x400 * channel, 0x8041003);
+		write32 (DEFAULT_MCHBAR + 0x4208 + 0x400 * channel, (slotrank << 24));
+		write32 (DEFAULT_MCHBAR + 0x4218 + channel * 0x400, 0x3e2);
+
+		write32 (DEFAULT_MCHBAR + 0x422c + channel * 0x400, 0x1f207);
+		write32 (DEFAULT_MCHBAR + 0x423c + 0x400 * channel, 0x8000c01 | ((ctrl->CWL + ctrl->tWTR + 5) << 16));
+		write32 (DEFAULT_MCHBAR + 0x420c + 0x400 * channel, (slotrank << 24) | 0x8);
+		write32 (DEFAULT_MCHBAR + 0x421c + channel * 0x400, 0x3e0);
+
+		write32 (DEFAULT_MCHBAR + 0x4284 + channel * 0x400, 0xc0001);
+
+		wait_428c (channel);
+
+		write32 (DEFAULT_MCHBAR + 0x4220 + channel * 0x400, 0x1f002);
+		write32 (DEFAULT_MCHBAR + 0x4230 + 0x400 * channel, 0xc01 | ((ctrl->tRP) << 16));
+		write32 (DEFAULT_MCHBAR + 0x4200 + 0x400 * channel, (slotrank << 24) | 0x60400);
+		write32 (DEFAULT_MCHBAR + 0x4210 + channel * 0x400, 0x240);
+
+		write32 (DEFAULT_MCHBAR + 0x4224 + channel * 0x400, 0x1f006);
+		write32 (DEFAULT_MCHBAR + 0x4234 + 0x400 * channel, 0xc01 | ((ctrl->tRCD) << 16));
+		write32 (DEFAULT_MCHBAR + 0x4204 + 0x400 * channel, (slotrank << 24) | 0x60000);
+		write32 (DEFAULT_MCHBAR + 0x4214 + channel * 0x400, 0);
+
+		write32 (DEFAULT_MCHBAR + 0x4228 + channel * 0x400, 0x3f105);
+		write32 (DEFAULT_MCHBAR + 0x4238 + 0x400 * channel, 0x4000c01 | ((ctrl->tRP
+										  + ctrl->timings[channel][slotrank].val_4024 
+										  + ctrl->timings[channel][slotrank].val_4028) << 16));
+		write32 (DEFAULT_MCHBAR + 0x4208 + 0x400 * channel, (slotrank << 24) | 0x60008);
+		write32 (DEFAULT_MCHBAR + 0x4218 + channel * 0x400, 0);
+
+		write32 (DEFAULT_MCHBAR + 0x4284 + channel * 0x400, 0x80001);
+		wait_428c (channel);
+		FOR_ALL_LANES {
+			u64 res = read32 (DEFAULT_MCHBAR + lane_registers[lane] + 0x100 * channel + 4);
+			res |= ((u64)read32 (DEFAULT_MCHBAR + lane_registers[lane] + 0x100 * channel + 8)) << 32;
+			ctrl->timings[channel][slotrank].lanes[lane].timB += get_timB_high_adjust (res) * 64;
+			printk(BIOS_ERR, "Bval+: %d, %d, %d, %x\n", channel, slotrank, lane, ctrl->timings[channel][slotrank].lanes[lane].timB);
+		}
+	}
+	write32 (DEFAULT_MCHBAR + 0x3400, 0);
+}
+
+static void
+write_training (ramctr_timing *ctrl)
+{
+  int channel, slotrank, lane;
+  u32 r32;
+
+  FOR_ALL_POPULATED_CHANNELS
+	  write32 (DEFAULT_MCHBAR + 0x4008 + 0x400 * channel,
+		   read32 (DEFAULT_MCHBAR + 0x4008 + 0x400 * channel) | 0x8000000);
+
+  FOR_ALL_POPULATED_CHANNELS {
+	  wait_428c (channel);
+
+	  /* choose an existing rank.  */
+	  slotrank = !ctrl->rankmap[channel][0] ? 2 : 0;
+
+	  write32 (DEFAULT_MCHBAR + 0x4220 + channel * 0x400, 0x0f003);
+	  write32 (DEFAULT_MCHBAR + 0x4230 + 0x400 * channel, 0x41001);
+
+	  write32 (DEFAULT_MCHBAR + 0x4200 + 0x400 * channel, (slotrank << 24) | 0x60000);
+
+	  write32 (DEFAULT_MCHBAR + 0x4210 + channel * 0x400, 0x3e0);
+
+	  write32 (DEFAULT_MCHBAR + 0x4284 + 0x400 * channel, 1);
+	  wait_428c (channel);
+	  write32 (DEFAULT_MCHBAR + 0x4020 + channel * 0x400,
+		   read32 (DEFAULT_MCHBAR + 0x4020 + channel * 0x400) | 0x200000);
+  }
+  write32 (DEFAULT_MCHBAR + 0x5030,
+	   read32 (DEFAULT_MCHBAR + 0x5030) & ~8);
+  FOR_ALL_POPULATED_CHANNELS {
+	  wait_428c (channel);
+
+	  /* choose an existing rank.  */
+	  slotrank = !ctrl->rankmap[channel][0] ? 2 : 0;
+
+	  write32 (DEFAULT_MCHBAR + 0x4220 + channel * 0x400, 0x0f003);
+	  write32 (DEFAULT_MCHBAR + 0x4230 + 0x400 * channel, 0x41001);
+
+	  write32 (DEFAULT_MCHBAR + 0x4200 + 0x400 * channel, (slotrank << 24) | 0x60000);
+
+	  write32 (DEFAULT_MCHBAR + 0x4210 + channel * 0x400, 0x3e0);
+
+	  write32 (DEFAULT_MCHBAR + 0x4284 + 0x400 * channel, 1);
+	  wait_428c (channel);
+  }
+
+  FOR_ALL_CHANNELS
+	  FOR_ALL_POPULATED_RANKS
+		write_mrreg (ctrl, channel, slotrank, 1, make_mr1(ctrl, slotrank) | 0x1080);
+
+  write32 (DEFAULT_MCHBAR + 0x3400, 0x108052);
+
+  r32 = read32 (DEFAULT_MCHBAR + 0x5030);
+  write32 (DEFAULT_MCHBAR + 0x5030, r32 | 0x20);
+  udelay (1);
+
+  write32 (DEFAULT_MCHBAR + 0x5030, r32 & ~0x20);
+
+  udelay (1);
+
+  FOR_ALL_CHANNELS
+	  FOR_ALL_POPULATED_RANKS
+		  discover_timB(ctrl, channel, slotrank);
+
+  FOR_ALL_CHANNELS
+	  FOR_ALL_POPULATED_RANKS
+		write_mrreg (ctrl, channel, slotrank, 1, make_mr1(ctrl, slotrank));
+
+  write32 (DEFAULT_MCHBAR + 0x3400, 0);
+
+  FOR_ALL_POPULATED_CHANNELS
+	  wait_428c (channel);
+
+  write32 (DEFAULT_MCHBAR + 0x5030, read32 (DEFAULT_MCHBAR + 0x5030) | 8);
+  
+  FOR_ALL_POPULATED_CHANNELS
+  {
+	  write32 (DEFAULT_MCHBAR + 0x4020 + channel * 0x400,
+		   ~0x00200000 & read32 (DEFAULT_MCHBAR + 0x4020 + channel * 0x400));
+	  read32 (DEFAULT_MCHBAR + 0x428c + channel * 0x400);
+	  wait_428c (channel);
+
+	  write32 (DEFAULT_MCHBAR + 0x4220 + channel * 0x400, 0x0f003);
+	  write32 (DEFAULT_MCHBAR + 0x4230 + 0x400 * channel, 0x659001);
+	  write32 (DEFAULT_MCHBAR + 0x4200 + 0x400 * channel, 0x60000);
+	  write32 (DEFAULT_MCHBAR + 0x4210 + channel * 0x400, 0x3e0);
+
+	  write32 (DEFAULT_MCHBAR + 0x4284 + channel * 0x400, 1);
+	  wait_428c (channel);
+  }
+
+  r32 = read32 (DEFAULT_MCHBAR + 0x5030);
+  write32 (DEFAULT_MCHBAR + 0x5030, r32 | 0x20);
+  udelay (1);
+
+  write32 (DEFAULT_MCHBAR + 0x5030, r32 & ~0x20);
+
+  udelay (1);
+
+  printk (BIOS_ERR, "CPE\n");
+  precharge (ctrl);
+  printk (BIOS_ERR, "CPF\n");
+
+  FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS
+    FOR_ALL_LANES
+      {
+	read32 (0x4080 + 4 * lane + 0x400 * channel + DEFAULT_MCHBAR);
+	write32 (0x4080 + 4 * lane + 0x400 * channel + DEFAULT_MCHBAR, 0);
+      }
+
+  FOR_ALL_POPULATED_CHANNELS
+  {
+	  fill_pattern0 (ctrl, channel, 0xaaaaaaaa, 0x55555555);
+	  write32 (DEFAULT_MCHBAR | 0x4288 | (channel << 10), 0);
+  }
+
+  FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS
+	  discover_timC(ctrl, channel, slotrank);
+
+  FOR_ALL_POPULATED_CHANNELS
+	  program_timings (ctrl, channel);
+
+  adjust_high_timB(ctrl);
+
+  FOR_ALL_POPULATED_CHANNELS
+	  program_timings (ctrl, channel);
+
+  FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS
+    FOR_ALL_LANES
+      {
+	read32 (0x4080 + 4 * lane + 0x400 * channel + DEFAULT_MCHBAR);
+	write32 (0x4080 + 4 * lane + 0x400 * channel + DEFAULT_MCHBAR, 0);
+      }
+}
+
+static int
+test_320c (ramctr_timing *ctrl, int channel, int slotrank)
+{
+	struct ram_rank_timings saved_rt = ctrl->timings[channel][slotrank];
+	int timC_delta;
+	int lanes_ok = 0;
+	int ctr = 0;
+	int lane;
+
+	for (timC_delta = -5; timC_delta <= 5; timC_delta++) {
+		FOR_ALL_LANES
+		{
+			ctrl->timings[channel][slotrank].lanes[lane].timC = saved_rt.lanes[lane].timC + timC_delta;
+		}
+		program_timings (ctrl, channel);
+		FOR_ALL_LANES
+			write32 (DEFAULT_MCHBAR + 4 * lane + 0x4f40, 0);
+
+		write32 (DEFAULT_MCHBAR + 0x4288 + channel * 0x400, 0x1f);
+
+		wait_428c (channel);
+
+		write32 (DEFAULT_MCHBAR + 0x4220 + channel * 0x400, 0x1f006);
+		write32 (DEFAULT_MCHBAR + 0x4230 + 0x400 * channel, ((max (ctrl->tRRD, (ctrl->tFAW >> 2) + 1)) << 10)
+			 | 8 | (ctrl->tRCD << 16));
+
+		write32 (DEFAULT_MCHBAR + 0x4200 + 0x400 * channel,
+			 (slotrank << 24) | ctr | 0x60000);
+
+		write32 (DEFAULT_MCHBAR + 0x4210 + channel * 0x400, 0x244);
+
+		write32 (DEFAULT_MCHBAR + 0x4224 + channel * 0x400, 0x1f201);
+		write32 (DEFAULT_MCHBAR + 0x4234 + 0x400 * channel, 0x8001020 | ((ctrl->CWL + ctrl->tWTR + 8) << 16));
+		write32 (DEFAULT_MCHBAR + 0x4204 + 0x400 * channel, (slotrank << 24));
+		write32 (DEFAULT_MCHBAR + 0x4244 + channel * 0x400, 0x389abcd);
+		write32 (DEFAULT_MCHBAR + 0x4214 + channel * 0x400, 0x20e42);
+
+		write32 (DEFAULT_MCHBAR + 0x4228 + channel * 0x400, 0x1f105);
+		write32 (DEFAULT_MCHBAR + 0x4238 + 0x400 * channel, 0x4001020 | (max (ctrl->tRTP, 8) << 16));
+		write32 (DEFAULT_MCHBAR + 0x4208 + 0x400 * channel, (slotrank << 24));
+		write32 (DEFAULT_MCHBAR + 0x4248 + channel * 0x400, 0x389abcd);
+		write32 (DEFAULT_MCHBAR + 0x4218 + channel * 0x400, 0x20e42);
+
+		write32 (DEFAULT_MCHBAR + 0x422c + channel * 0x400, 0x1f002);
+		write32 (DEFAULT_MCHBAR + 0x423c + 0x400 * channel, 0xf1001);
+		write32 (DEFAULT_MCHBAR + 0x420c + 0x400 * channel, (slotrank << 24) | 0x60400);
+		write32 (DEFAULT_MCHBAR + 0x421c + channel * 0x400, 0x240);
+
+		write32 (DEFAULT_MCHBAR + 0x4284 + 0x400 * channel, 0xc0001);
+		wait_428c (channel);
+		FOR_ALL_LANES
+		{
+			u32 r32 = read32 (DEFAULT_MCHBAR + 0x4340 + 4 * lane + channel * 0x400);
+			
+			if (r32 == 0)
+				lanes_ok |= 1 << lane;
+		}
+		ctr++;
+		if (lanes_ok == ((1 << NUM_LANES) - 1))
+			break;
+	}
+
+	ctrl->timings[channel][slotrank] = saved_rt;
+	return lanes_ok != ((1 << NUM_LANES) - 1);
+}
+
+const u32 pattern[][16] = {
+  { 0x00000000, 0x00000000, 0xffffffff, 0xffffffff, 
+    0x00000000, 0x00000000, 0xffffffff, 0xffffffff, 
+    0x00000000, 0x00000000, 0xffffffff, 0xffffffff, 
+    0x00000000, 0x00000000, 0xffffffff, 0xffffffff }, 
+  { 0xffffffff, 0xffffffff, 0x00000000, 0x00000000, 
+    0xffffffff, 0xffffffff, 0x00000000, 0x00000000, 
+    0xffffffff, 0xffffffff, 0x00000000, 0x00000000, 
+    0xffffffff, 0xffffffff, 0x00000000, 0x00000000 }, 
+  { 0xe62d6424, 0x9277e09e, 0x8f43dc3f, 0x76eae589,
+    0x0010fdc6, 0xdc55e01c, 0x5effb0ab, 0x6cba5d29,
+    0xa43d1e64, 0xab5c2e0f, 0x7796ed16, 0x96023bf4,
+    0xa74c831d, 0x90f138c0, 0x17830a8a, 0x5ac17c47 },
+  { 0x359ebbeb, 0x2b9b4512, 0xef584d98, 0x106bf7cb,
+    0x363525ad, 0xb3a4dfdc, 0xa6b9fcd8, 0xd21689ec,
+    0x84a3695b, 0xbd9c2e27, 0xdb3d0f44, 0x988158f1,
+    0xcca91d3f, 0xb62a6d12, 0xe905e4cf, 0x7f1fa626 },
+  { 0xe58efeae, 0xcd006081, 0xa9119403, 0xbcfbd35f,
+    0x213b3bf7, 0x7bfcb773, 0xc85143f9, 0x0bdbff50,
+    0xa3053c90, 0x51d66cb7, 0x296f4387, 0xb715f99e,
+    0xfaddc989, 0xbb1de8a7, 0x39206b4d, 0x80174a57 },
+  { 0xa1622ac1, 0xb4f4a5f0, 0x16dc2bc3, 0x50fb0954,
+    0x2e261721, 0x52b82c3c, 0x821902b8, 0x0d4b6c38,
+    0x1f618631, 0x047956f3, 0xd4337f5a, 0x591f8002,
+    0x27f28db2, 0xfae37369, 0xb3f27580, 0x3cdb6397 },
+  { 0x3dee23be, 0x19f36408, 0x227f4a6a, 0x024603c5,
+    0xd5e062db, 0x6d8d4c5c, 0x7ff693b0, 0x76641be9,
+    0x9e74f41c, 0xe7bc7f33, 0x2636f2e9, 0x70279750,
+    0xce2355aa, 0x32d230ef, 0x22f9b468, 0xadd4e7a2 },
+  { 0x936c0fed, 0xba0612d5, 0xa97c1ea7, 0x10e29d67,
+    0x1c4c5dc8, 0x83645621, 0xcd8b521c, 0xb8301817,
+    0xac7d6571, 0xcc41d200, 0x4ebdefdd, 0xd2917bde,
+    0x60f75acc, 0x7791534b, 0x26ea2a83, 0x6b74513a },
+  { 0xd1957b85, 0xc6f8f9ca, 0xf04fb4be, 0xfeb786fb,
+    0xa1dea3aa, 0x67fe7db6, 0x25d49c87, 0xe3d54870,
+    0x93dc1f86, 0x7d0c1a18, 0x9272e128, 0x68e1b876,
+    0xce284c9e, 0x8fa18792, 0x5785a340, 0xb6fcf198 },
+  { 0xff7d8e4a, 0x0c21ee43, 0xe820b388, 0xb4443c0e,
+    0xa1e6e498, 0x5c426110, 0x1b434ef3, 0xbef05b91,
+    0xa6907968, 0x53662ac3, 0x6defac32, 0x2c11c29c,
+    0x6175cced, 0xb17dd3ad, 0x6e6a1076, 0x1372b1fa },
+  { 0x4408ed06, 0x49460ffd, 0xb49d26cb, 0x6a3662a5,
+    0x5e857047, 0xa387cd4a, 0x04edc81e, 0xfd94d8d4,
+    0x2fe48d91, 0x9d2356bc, 0x96131878, 0xaca3fce4,
+    0xbb312c6c, 0x5023b090, 0x3614be70, 0xa14dfabb },
+  { 0xd4cc1e83, 0x757a1930, 0xc3d16a61, 0x9e0d6681,
+    0x8a081fa9, 0xbd11c888, 0x1672f010, 0xa083f71c,
+    0x1ec02eef, 0xc4586ca8, 0x6d322b35, 0x56054679,
+    0x1552a0ff, 0x5cb7707e, 0xdfb55d4a, 0xcc76cc07 },
+  { 0x507cf71f, 0x2166421a, 0x54be4af0, 0xfd42158c,
+    0x417b1f7f, 0x9466860b, 0x3a0075bf, 0x2055575c,
+    0xcedfe7ab, 0xbe85aa5f, 0x39d0c2e3, 0x851c19df,
+    0x39a35a3f, 0x3fb10d7d, 0x20b14899, 0x703b7f08 },
+  { 0x8a7d9dd1, 0x33235565, 0xbd3d2e57, 0xa48c2726,
+    0x0d5e2e13, 0xae421ff9, 0x8784a224, 0xf66c1510,
+    0x057627aa, 0x8fb0cb41, 0x4289975a, 0xb181adfa,
+    0x59f2059a, 0xe86feb05, 0x84222fc1, 0x319b3ce9 },
+  { 0xe1e243b8, 0x3b0bcc1a, 0x70396f00, 0x5caff44d,
+    0xe96961b3, 0xad73f692, 0x8b841a2d, 0xf5838839,
+    0xec9c9d04, 0xcc2b5562, 0xf8ca2549, 0xa9c52ff8,
+    0x3b2fde68, 0x3d4dc7f0, 0xa57387d0, 0x051199ad },
+  { 0x5f0ce4fc, 0xd830fbb7, 0x90abeb8f, 0x96d9cdbb,
+    0x58f80a80, 0x0baaca36, 0x81a23623, 0x77127614,
+    0xaa8382cd, 0x0922fbca, 0xd84d37e1, 0x721297df,
+    0x160f3b3a, 0x10a1ecdc, 0x151c92f4, 0xc1fdcdab },
+  { 0x261c45cc, 0xfeddd2da, 0xfc3cb1c1, 0x6639641f,
+    0x2c011892, 0x7108bee2, 0x8545e0b9, 0x7dd36dab,
+    0x07d91950, 0x1520adcb, 0xf84aa939, 0x07d9bb2d,
+    0xdf1ed826, 0xaee3c814, 0x1dca1e81, 0xc8e9f486 },
+  { 0x933d306a, 0xaab7103d, 0xa8be37be, 0x49612f3a,
+    0xb0cf28e5, 0xf9648902, 0x106d7c11, 0xf32e1813,
+    0x21af36ef, 0xe695e4c4, 0x7ee1831d, 0x2aeda467,
+    0x99d0c655, 0x3f0691ab, 0xcd68f7c1, 0xb469a20e },
+  { 0x8557aef0, 0x3eb0e373, 0x0853ac31, 0xe5bded62,
+    0x3eddb0dd, 0x6bbf1caf, 0x2119c3d9, 0xe1732350,
+    0x55456c75, 0xf6119375, 0x498dd1ad, 0x13f80916,
+    0xb97f9f5e, 0x921d9f4c, 0xabdee367, 0x1d6bb8bf },
+  { 0xd165a3be, 0xd8b41598, 0xa20e1809, 0xefd5c8ce,
+    0x18935c80, 0xdf1911f9, 0xc9e449eb, 0xb887a4d7,
+    0x4a324f6f, 0x533e8031, 0x1c21c074, 0xa95f1ea5,
+    0x765b320a, 0x839d7dfb, 0xc7d3aa93, 0xe534ae3d },
+  { 0xbe8592c8, 0x068457e6, 0x89b94fa3, 0xd522ad02,
+    0x7e7db0b7, 0x2c5b896f, 0x9f8ecb37, 0x05b983ff,
+    0x3fe9b25f, 0x34a6215b, 0x0592ba34, 0xd564f85a,
+    0x156c426d, 0x25ad5460, 0xe7b5e8b7, 0xa73285c6 },
+  { 0x5ad8d838, 0x27b42d36, 0xcc806ad1, 0x157a058a,
+    0x7297735a, 0xffd6df8d, 0xff96f7a2, 0x155b27ea,
+    0x84708101, 0x979fd78b, 0x49797d0c, 0x0dc93e3c,
+    0x20287332, 0xed759f88, 0xe5068529, 0xb83aa781 },
+  { 0xc38b302c, 0x57b54075, 0xac810692, 0xb0d493e7,
+    0x4adda486, 0x0665ce2e, 0xb2a9c003, 0xafacc4ce,
+    0x4d5e906d, 0xb3d52fab, 0xe6962c6b, 0x850f4dd1,
+    0x5021656c, 0x5df6c06b, 0x9255125b, 0x2363c478 },
+  { 0x188b715c, 0xe8b884b0, 0x5e6d0b9a, 0x1f0051e1,
+    0xd2d35d4c, 0xbfeaecbe, 0xc84bb0ad, 0x67a232d6,
+    0x99001587, 0xbf4313e1, 0x74f64061, 0x2c1fc562,
+    0xb6fe8ca6, 0x5226a239, 0xf5198574, 0x61b51dca },
+  { 0x51dcecd3, 0xbadbe596, 0xebe3e84a, 0x772bfdfc,
+    0x03656ac5, 0xa7c36e91, 0x6cd32cf0, 0xc3f699dd,
+    0x7d5aba01, 0x51e38e82, 0x23103a98, 0x20298b9d,
+    0x19436510, 0x63ad7e6c, 0x8bc2b33f, 0x27079917 },
+  { 0x8bd5be78, 0xf2403bfa, 0x780ebdb6, 0x94c53b64,
+    0x6241c2e2, 0x5bfb081e, 0x6799e88f, 0xc997b7d1,
+    0x466ac8b1, 0xbf5909da, 0x497ea39f, 0x402ffb48,
+    0xd7470c2d, 0x8510aba9, 0x6c52a1c9, 0x812ca967 },
+  { 0x031f7ab4, 0xd32fe890, 0x36ae6de5, 0x083dcde4,
+    0x99a7f12f, 0xe44864a7, 0x02b75fff, 0xf25dda35,
+    0x7679ff4f, 0xed421e01, 0xd9c2cfa1, 0xd36b4e82,
+    0x5315d908, 0xc7ebcb2a, 0xb6f3e4c1, 0xf5bfbae9 },
+  { 0x3f4a2a96, 0x64d8bd5a, 0x19acd70d, 0xf62fcdd9,
+    0x5de99cdf, 0x32f3b7cb, 0x2c020578, 0x4e9bafb8,
+    0x74919a08, 0xaba33e91, 0xa6bd2254, 0x2435a9b9,
+    0x47e2a1b4, 0xe837a28e, 0xe113f1b0, 0x7654bd79 },
+  { 0x05537a6c, 0x77be1a5c, 0x4c7492c9, 0x9086bfb0,
+    0x257adc18, 0xf4787fc1, 0xe3fb6d53, 0x9525e589,
+    0x445a65bc, 0x833f7d08, 0x69cf1f7e, 0x9a6372e1,
+    0xceedb52e, 0x31032997, 0xd1c36828, 0x132772d6 },
+  { 0x0a166972, 0x89beaf3b, 0x8d780fbc, 0x8aea5392,
+    0x58347a41, 0x1e381ec2, 0xcc6280c8, 0xee0863e1,
+    0x976e2dd2, 0x8c6ee6e2, 0xa0ca57cd, 0x95114a7d,
+    0x3c096704, 0xa941769d, 0x2de20c05, 0x0bf8f812 },
+  { 0x22779d6c, 0x94e12e8f, 0x5ce40299, 0xea1b55b0,
+    0x9ebec05d, 0xe076cd2b, 0x8fef5648, 0x6a284c65,
+    0xa790b705, 0xf0b19997, 0x0d8ca8af, 0x17440419,
+    0xef4f702f, 0x33cbcbb1, 0x83d60f26, 0x48988397 },
+  { 0x0fed7f53, 0xb5acbb67, 0xc031c73f, 0x5364d9ef,
+    0xa6dbd12d, 0x82174a6c, 0xccf8e7ab, 0xc473c036,
+    0xcff493d8, 0xad9afc3b, 0x316a24e8, 0x1842bea4,
+    0x4cc0c82e, 0x28ccd91e, 0xd7311b5d, 0x50a89860 },
+};
+
+static void fill_pattern5 (ramctr_timing *ctrl, int channel)
+{
+	unsigned i, j;
+	unsigned channel_offset = get_precedening_channels (ctrl, channel) * 0x40;
+	unsigned channel_step = 0x40 * num_of_channels (ctrl);
+	for (i = 0; i < sizeof (pattern) / sizeof (pattern[0]); i++)
+	{
+		for (j = 0 ; j < 16; j++)
+			write32 (0x04000000 + channel_offset + i * channel_step + j * 4, pattern[i][j]);
+	}
+	sfence ();
+}
+
+
+static void
+reprogram_320c (ramctr_timing *ctrl)
+{
+	int channel, slotrank;
+	u32 r32;
+
+	FOR_ALL_POPULATED_CHANNELS {
+		wait_428c (channel);
+
+		/* choose an existing rank.  */
+		slotrank = !ctrl->rankmap[channel][0] ? 2 : 0;
+
+		write32 (DEFAULT_MCHBAR + 0x4220 + channel * 0x400, 0x0f003);
+		write32 (DEFAULT_MCHBAR + 0x4230 + 0x400 * channel, 0x41001);
+
+		write32 (DEFAULT_MCHBAR + 0x4200 + 0x400 * channel, (slotrank << 24) | 0x60000);
+
+		write32 (DEFAULT_MCHBAR + 0x4210 + channel * 0x400, 0x3e0);
+
+		write32 (DEFAULT_MCHBAR + 0x4284 + 0x400 * channel, 1);
+		wait_428c (channel);
+		write32 (DEFAULT_MCHBAR + 0x4020 + channel * 0x400,
+			 read32 (DEFAULT_MCHBAR + 0x4020 + channel * 0x400) | 0x200000);
+	}
+  write32 (DEFAULT_MCHBAR + 0x5030,
+	   read32 (DEFAULT_MCHBAR + 0x5030) & ~8);
+  FOR_ALL_POPULATED_CHANNELS {
+	  wait_428c (channel);
+
+	  /* choose an existing rank.  */
+	  slotrank = !ctrl->rankmap[channel][0] ? 2 : 0;
+
+	  write32 (DEFAULT_MCHBAR + 0x4220 + channel * 0x400, 0x0f003);
+	  write32 (DEFAULT_MCHBAR + 0x4230 + 0x400 * channel, 0x41001);
+
+	  write32 (DEFAULT_MCHBAR + 0x4200 + 0x400 * channel, (slotrank << 24) | 0x60000);
+
+	  write32 (DEFAULT_MCHBAR + 0x4210 + channel * 0x400, 0x3e0);
+
+	  write32 (DEFAULT_MCHBAR + 0x4284 + 0x400 * channel, 1);
+	  wait_428c (channel);
+  }
+
+  /* jedec reset */
+  dram_jedecreset(ctrl);
+  /* mrs commands. */
+  dram_mrscommands(ctrl);
+
+  r32 = read32 (DEFAULT_MCHBAR + 0x5030);
+  write32 (DEFAULT_MCHBAR + 0x5030, r32 | 0x20);
+  udelay (1);
+
+  write32 (DEFAULT_MCHBAR + 0x5030, r32 & ~0x20);
+
+  udelay (1);
+}
+
+static void
+command_training (ramctr_timing *ctrl)
+{
+  int channel;
+  int slotrank;
+  u32 reg_4004_b30;
+  int delta = 0;
+  int c320c;
+  int stat[NUM_SLOTRANKS][256];
+
+  /* FIXME: vendor BIOS discovers this by trying 0 and 2. Apparently 2 should work for
+     all systems but 0 is slightly more efficient for the systems that can tolerate it.
+   */
+  reg_4004_b30 = 2;
+
+  FOR_ALL_POPULATED_CHANNELS
+      /* FIXME: avoid unnecessarry readback.  */
+      MCHBAR32 (0x4004 + 0x400 * channel) = 
+	(MCHBAR32 (0x4004 + 0x400 * channel) & ~(3 << 30))
+	| (reg_4004_b30 << 30);
+
+  if (reg_4004_b30 == 2)
+	  delta = 2;
+  else if (reg_4004_b30 == 0)
+	  delta = 4;
+
+  FOR_ALL_CHANNELS
+  {
+	  FOR_ALL_POPULATED_RANKS
+		  ctrl->timings[channel][slotrank].val_4024 -= delta;
+  }
+
+  FOR_ALL_POPULATED_CHANNELS
+  {
+	  fill_pattern5 (ctrl, channel);
+	  write32 (DEFAULT_MCHBAR + 0x4288 + 0x400 * channel, 0x1f);
+  }
+
+  FOR_ALL_POPULATED_CHANNELS
+  {
+	  for (c320c = -127; c320c <= 127; c320c++)
+	  {
+		  FOR_ALL_POPULATED_RANKS
+			  ctrl->timings[channel][slotrank].val_320c = c320c;
+		  program_timings (ctrl, channel);
+		  reprogram_320c (ctrl);
+		  FOR_ALL_POPULATED_RANKS
+		  {
+			  stat[slotrank][c320c + 127] = test_320c (ctrl, channel, slotrank);
+			  printk (BIOS_ERR, "3stat: %d, %d, %d: %d\n", channel, slotrank, c320c, 
+				  stat[slotrank][c320c + 127]);
+		  }
+	  }
+	  FOR_ALL_POPULATED_RANKS
+	  {
+		  struct run rn = get_longest_zero_run (stat[slotrank], 255);
+		  ctrl->timings[channel][slotrank].val_320c = rn.middle - 127;
+		  printk (BIOS_ERR, "3val: %d, %d: %d\n", channel, slotrank,
+			  ctrl->timings[channel][slotrank].val_320c);
+		  if (rn.all)
+			  die ("c320c discovery failed");
+	  }
+  }
+
+  FOR_ALL_POPULATED_CHANNELS
+	  program_timings (ctrl, channel);
+
+  reprogram_320c(ctrl);
+}
+
+static void
+discover_edges_real (ramctr_timing *ctrl, int channel, int slotrank, int *edges)
+{
+  int edge;
+  int statistics[NUM_LANES][MAX_EDGE_TIMING + 1];
+  int lane;
+
+  for (edge = 0; edge <= MAX_EDGE_TIMING; edge++) {
+	  FOR_ALL_LANES
+	  {
+		  ctrl->timings[channel][slotrank].lanes[lane].rising = edge;
+		  ctrl->timings[channel][slotrank].lanes[lane].falling = edge;
+	  }
+	  printk (BIOS_ERR, "edge %02x\n", edge);
+	  program_timings (ctrl, channel);
+
+	  FOR_ALL_LANES
+	  {
+		  write32 (DEFAULT_MCHBAR + 0x4340 + 0x400 * channel + 4 * lane, 0);
+		  read32 (DEFAULT_MCHBAR + 0x400 * channel + 4 * lane + 0x4140);
+	  }
+
+	  wait_428c (channel);
+
+	  write32 (DEFAULT_MCHBAR + 0x4220 + 0x400 * channel, 0x1f000);
+	  write32 (DEFAULT_MCHBAR + 0x4230 + 0x400 * channel, (0xc01 | (ctrl->delay1 << 16)));
+	  write32 (DEFAULT_MCHBAR + 0x4200 + 0x400 * channel, (slotrank << 24) | 0x360004);
+	  write32 (DEFAULT_MCHBAR + 0x4210 + channel * 0x400, 0);
+
+	  write32 (DEFAULT_MCHBAR + 0x4224 + 0x400 * channel, 0x1f105);
+	  write32 (DEFAULT_MCHBAR + 0x4234 + 0x400 * channel, 0x40411f4);
+	  write32 (DEFAULT_MCHBAR + 0x4204 + 0x400 * channel, (slotrank << 24));
+	  write32 (DEFAULT_MCHBAR + 0x4214 + channel * 0x400, 0);
+
+	  write32 (DEFAULT_MCHBAR + 0x4228 + channel * 0x400, 0x1f105);
+	  write32 (DEFAULT_MCHBAR + 0x4238 + 0x400 * channel, 0x1001 | ((ctrl->CAS + 8) << 16));
+	  write32 (DEFAULT_MCHBAR + 0x4208 + 0x400 * channel, (slotrank << 24) | 0x60000);
+	  write32 (DEFAULT_MCHBAR + 0x4218 + channel * 0x400, 0);
+
+	  write32 (DEFAULT_MCHBAR + 0x422c + 0x400 * channel, 0x1f000);
+	  write32 (DEFAULT_MCHBAR + 0x423c + 0x400 * channel, (0xc01 | (ctrl->delay1 << 16)));
+	  write32 (DEFAULT_MCHBAR + 0x420c + 0x400 * channel, (slotrank << 24) | 0x360000);
+	  write32 (DEFAULT_MCHBAR + 0x421c + channel * 0x400, 0);
+
+	  write32 (DEFAULT_MCHBAR + 0x4284 + 0x400 * channel, 0xc0001);
+
+	  wait_428c (channel);
+
+	  FOR_ALL_LANES
+	  {
+		  statistics[lane][edge] = read32 (DEFAULT_MCHBAR + 0x4340 + 0x400 * channel + lane * 4);
+		  printk (BIOS_ERR, "estat %d, %d, %d, %d %02x\n", channel, slotrank, lane, edge, statistics[lane][edge]);
+	  }
+  }
+  FOR_ALL_LANES
+  {
+	  struct run rn = get_longest_zero_run (statistics[lane], MAX_EDGE_TIMING + 1);
+	  edges[lane] = rn.middle;
+	  if (rn.all)
+		  die ("edge discovery failed");
+	  printk (BIOS_ERR, "eval %d, %d, %d, %02x\n", channel, slotrank, lane, edges[lane]);
+  }
+}
+
+static void
+discover_edges (ramctr_timing *ctrl)
+{
+	int falling_edges[NUM_CHANNELS][NUM_SLOTRANKS][NUM_LANES];
+	int rising_edges[NUM_CHANNELS][NUM_SLOTRANKS][NUM_LANES];
+	int channel, slotrank, lane;
+	u32 r32;
+
+	write32 (DEFAULT_MCHBAR + 0x3400, 0);
+
+	r32 = read32 (DEFAULT_MCHBAR + 0x5030);
+	write32 (DEFAULT_MCHBAR + 0x5030, r32 | 0x20);
+	udelay (1);
+
+	write32 (DEFAULT_MCHBAR + 0x5030, r32 & ~0x20);
+
+	udelay (1);
+
+	FOR_ALL_POPULATED_CHANNELS
+	{
+		FOR_ALL_LANES
+			write32 (DEFAULT_MCHBAR + 4 * lane + 0x400 * channel + 0x4080, 0);
+	}
+
+	FOR_ALL_POPULATED_CHANNELS
+	{
+		fill_pattern0 (ctrl, channel, 0, 0);
+		write32 (DEFAULT_MCHBAR | 0x4288 | (channel << 10), 0);
+		FOR_ALL_LANES
+			read32 (DEFAULT_MCHBAR + 0x400 * channel + lane * 4 + 0x4140);
+
+		FOR_ALL_POPULATED_RANKS
+			FOR_ALL_LANES
+		{
+			ctrl->timings[channel][slotrank].lanes[lane].falling = 16;
+			ctrl->timings[channel][slotrank].lanes[lane].rising = 16;
+		}
+
+		program_timings (ctrl, channel);
+
+		FOR_ALL_POPULATED_RANKS
+		{
+			wait_428c (channel);
+
+			write32 (DEFAULT_MCHBAR + 0x4220 + 0x400 * channel, 0x1f000);
+			write32 (0x4230 + 0x400 * channel + DEFAULT_MCHBAR, 0xc01 | (ctrl->delay1 << 16));
+			write32 (DEFAULT_MCHBAR + 0x4200 + channel * 0x400, (slotrank << 24) | 0x360004);
+			write32 (DEFAULT_MCHBAR + 0x4210 + channel * 0x400, 0);
+
+			write32 (DEFAULT_MCHBAR + 0x4224 + 0x400 * channel, 0x1f105);
+			write32 (0x4234 + 0x400 * channel + DEFAULT_MCHBAR, 0x4041003);
+			write32 (DEFAULT_MCHBAR + 0x4204 + channel * 0x400, (slotrank << 24) | 0);
+			write32 (DEFAULT_MCHBAR + 0x4214 + channel * 0x400, 0);
+
+			write32 (DEFAULT_MCHBAR + 0x4228 + 0x400 * channel, 0x1f105);
+			write32 (0x4238 + 0x400 * channel + DEFAULT_MCHBAR, 0x1001 | ((ctrl->CAS + 8) << 16));
+			write32 (DEFAULT_MCHBAR + 0x4208 + channel * 0x400, (slotrank << 24) | 0x60000);
+			write32 (DEFAULT_MCHBAR + 0x4218 + channel * 0x400, 0);
+
+			write32 (DEFAULT_MCHBAR + 0x422c + 0x400 * channel, 0x1f000);
+			write32 (0x423c + 0x400 * channel + DEFAULT_MCHBAR, 0xc01 | (ctrl->delay1 << 16));
+			write32 (DEFAULT_MCHBAR + 0x420c + channel * 0x400, (slotrank << 24) | 0x360000);
+			write32 (DEFAULT_MCHBAR + 0x421c + channel * 0x400, 0);
+			write32 (DEFAULT_MCHBAR + 0x4284 + 0x400 * channel, 0xc0001);
+
+			wait_428c (channel);
+		}
+
+		FOR_ALL_POPULATED_RANKS
+			FOR_ALL_LANES
+		{
+			ctrl->timings[channel][slotrank].lanes[lane].falling = 48;
+			ctrl->timings[channel][slotrank].lanes[lane].rising = 48;
+		}
+
+		program_timings (ctrl, channel);
+
+		FOR_ALL_POPULATED_RANKS
+		{
+			wait_428c (channel);
+
+			write32 (DEFAULT_MCHBAR + 0x4220 + 0x400 * channel, 0x1f000);
+			write32 (0x4230 + 0x400 * channel + DEFAULT_MCHBAR, 0xc01 | (ctrl->delay1 << 16));
+			write32 (DEFAULT_MCHBAR + 0x4200 + channel * 0x400, (slotrank << 24) | 0x360004);
+			write32 (DEFAULT_MCHBAR + 0x4210 + channel * 0x400, 0);
+
+			write32 (DEFAULT_MCHBAR + 0x4224 + 0x400 * channel, 0x1f105);
+			write32 (0x4234 + 0x400 * channel + DEFAULT_MCHBAR, 0x4041003);
+			write32 (DEFAULT_MCHBAR + 0x4204 + channel * 0x400, (slotrank << 24) | 0);
+			write32 (DEFAULT_MCHBAR + 0x4214 + channel * 0x400, 0);
+
+			write32 (DEFAULT_MCHBAR + 0x4228 + 0x400 * channel, 0x1f105);
+			write32 (0x4238 + 0x400 * channel + DEFAULT_MCHBAR, 0x1001 | ((ctrl->CAS + 8) << 16));
+			write32 (DEFAULT_MCHBAR + 0x4208 + channel * 0x400, (slotrank << 24) | 0x60000);
+			write32 (DEFAULT_MCHBAR + 0x4218 + channel * 0x400, 0);
+
+			write32 (DEFAULT_MCHBAR + 0x422c + 0x400 * channel, 0x1f000);
+			write32 (0x423c + 0x400 * channel + DEFAULT_MCHBAR, 0xc01 | (ctrl->delay1 << 16));
+
+			write32 (DEFAULT_MCHBAR + 0x420c + channel * 0x400, (slotrank << 24) | 0x360000);
+			write32 (DEFAULT_MCHBAR + 0x421c + channel * 0x400, 0);
+
+			write32 (DEFAULT_MCHBAR + 0x4284 + 0x400 * channel, 0xc0001);
+			wait_428c (channel);
+		}
+
+		FOR_ALL_LANES
+		{
+			write32 (DEFAULT_MCHBAR + 0x4080 + 0x400 * channel + lane * 4,
+				~read32 (DEFAULT_MCHBAR + 0x4040 + 0x400 * channel + lane * 4) & 0xff);
+		}
+
+		fill_pattern0 (ctrl, channel, 0, 0xffffffff);
+		write32 (DEFAULT_MCHBAR | 0x4288 | (channel << 10), 0);
+	}
+
+	/* FIXME: under some conditions (older chipsets?) vendor BIOS sets both edges to the same value.  */
+	write32 (DEFAULT_MCHBAR + 0x4eb0, 0x300);
+
+	FOR_ALL_CHANNELS
+		FOR_ALL_POPULATED_RANKS
+	{
+		discover_edges_real (ctrl, channel, slotrank, falling_edges[channel][slotrank]);
+	}
+
+	write32 (DEFAULT_MCHBAR + 0x4eb0, 0x200);
+
+	FOR_ALL_CHANNELS
+		FOR_ALL_POPULATED_RANKS
+	{
+		discover_edges_real (ctrl, channel, slotrank, rising_edges[channel][slotrank]);
+	}
+
+	write32 (DEFAULT_MCHBAR + 0x4eb0, 0);
+
+	FOR_ALL_CHANNELS
+		FOR_ALL_POPULATED_RANKS
+		FOR_ALL_LANES
+	{
+			ctrl->timings[channel][slotrank].lanes[lane].falling = falling_edges[channel][slotrank][lane];
+			ctrl->timings[channel][slotrank].lanes[lane].rising = rising_edges[channel][slotrank][lane];
+	}
+
+
+	FOR_ALL_POPULATED_CHANNELS
+		program_timings (ctrl, channel);
+
+	FOR_ALL_CHANNELS
+	    FOR_ALL_POPULATED_RANKS
+		FOR_ALL_LANES
+	{
+		write32 (0x4080 + 4 * lane + 0x400 * channel + DEFAULT_MCHBAR, 0);
+	}
+}
+
+static void
+discover_edges_write_real (ramctr_timing *ctrl, int channel, int slotrank, int *edges)
+{
+  int edge;
+  u32 raw_statistics[MAX_EDGE_TIMING + 1];
+  int statistics[MAX_EDGE_TIMING + 1];
+  const int reg3000b24[] = {0, 0xc, 0x2c};
+  int lane, i;
+  int lower[NUM_LANES];
+  int upper[NUM_LANES];
+
+  FOR_ALL_LANES
+  {
+	  lower[lane] = 0;
+	  upper[lane] = MAX_EDGE_TIMING;
+  }
+
+  for (i = 0; i < 3; i++) {
+	  /* FIXME: trace shows that vendor BIOS also tests with other patterns.
+	     I'm not sure whether it's really needed.
+	  */
+	  write32 (DEFAULT_MCHBAR + 0x3000 + 0x100 * channel,
+		   reg3000b24[i] << 24);
+	  printk (BIOS_ERR, "patterned\n");
+	  printk (BIOS_ERR, "[%x] = 0x%08x\n(%d, %d)\n",0x3000 + 0x100 * channel,
+		  reg3000b24[i] << 24, channel, slotrank);
+	  for (edge = 0; edge <= MAX_EDGE_TIMING; edge++) {
+		  FOR_ALL_LANES
+		  {
+			  ctrl->timings[channel][slotrank].lanes[lane].rising = edge;
+			  ctrl->timings[channel][slotrank].lanes[lane].falling = edge;
+		  }
+		  program_timings (ctrl, channel);
+
+		  FOR_ALL_LANES
+		  {
+			  write32 (DEFAULT_MCHBAR + 0x4340 + 0x400 * channel + 4 * lane, 0);
+			  read32 (DEFAULT_MCHBAR + 0x400 * channel + 4 * lane + 0x4140);
+		  }
+		  wait_428c (channel);
+
+		  write32 (DEFAULT_MCHBAR + 0x4220 + channel * 0x400, 0x1f006);
+		  write32 (DEFAULT_MCHBAR + 0x4230 + 0x400 * channel, 0x4 | (ctrl->tRCD << 16)
+			   | (max (ctrl->tRRD,
+				   (ctrl->tFAW >> 2) + 1) << 10));
+		  write32 (DEFAULT_MCHBAR + 0x4200 + 0x400 * channel, (slotrank << 24) | 0x60000);
+		  write32 (DEFAULT_MCHBAR + 0x4210 + channel * 0x400, 0x240);
+
+		  write32 (DEFAULT_MCHBAR + 0x4224 + channel * 0x400, 0x1f201);
+		  write32 (DEFAULT_MCHBAR + 0x4234 + 0x400 * channel, 0x8005020 | ((ctrl->tWTR + ctrl->CWL + 8) << 16));
+		  write32 (DEFAULT_MCHBAR + 0x4204 + 0x400 * channel, (slotrank << 24));
+		  write32 (DEFAULT_MCHBAR + 0x4214 + channel * 0x400, 0x242);
+
+		  write32 (DEFAULT_MCHBAR + 0x4228 + channel * 0x400, 0x1f105);
+		  write32 (DEFAULT_MCHBAR + 0x4238 + 0x400 * channel, 0x4005020 | (max (ctrl->tRTP, 8) << 16));
+		  write32 (DEFAULT_MCHBAR + 0x4208 + 0x400 * channel, (slotrank << 24));
+		  write32 (DEFAULT_MCHBAR + 0x4218 + channel * 0x400, 0x242);
+
+		  write32 (DEFAULT_MCHBAR + 0x422c + channel * 0x400, 0x1f002);
+		  write32 (DEFAULT_MCHBAR + 0x423c + 0x400 * channel, 0xc01 | (ctrl->tRP << 16));
+		  write32 (DEFAULT_MCHBAR + 0x420c + 0x400 * channel, (slotrank << 24) | 0x60400);
+		  write32 (DEFAULT_MCHBAR + 0x421c + channel * 0x400, 0);
+
+		  write32 (DEFAULT_MCHBAR + 0x4284 + channel * 0x400, 0xc0001);
+		  wait_428c (channel);
+		  FOR_ALL_LANES
+		  {
+			  read32 (DEFAULT_MCHBAR + 0x4340 + 0x400 * channel + lane * 4);
+		  }
+
+		  raw_statistics[edge] = MCHBAR32 (0x436c + channel * 0x400);
+	  }
+	  FOR_ALL_LANES
+	  {
+		  struct run rn;
+		  for (edge = 0; edge <= MAX_EDGE_TIMING; edge++)
+			  statistics[edge] = !!(raw_statistics[edge] & (1 << lane));
+		  rn = get_longest_zero_run (statistics, MAX_EDGE_TIMING + 1);
+		  printk (BIOS_ERR, "edges: %d, %d, %d: 0x%x-0x%x-0x%x, 0x%x-0x%x\n", channel, slotrank, i,
+			  rn.start, rn.middle, rn.end,
+			  rn.start + ctrl->edge_offset[i],
+			  rn.end - ctrl->edge_offset[i]);
+		  lower[lane] = max (rn.start + ctrl->edge_offset[i], lower[lane]);
+		  upper[lane] = min (rn.end - ctrl->edge_offset[i], upper[lane]);
+		  edges[lane] = (lower[lane] + upper[lane]) / 2;
+					      
+	  }
+  }
+
+  write32 (DEFAULT_MCHBAR + 0x3000, 0);
+  printk (BIOS_ERR, "CPA\n");
+}
+
+static void
+discover_edges_write (ramctr_timing *ctrl)
+{
+	int falling_edges[NUM_CHANNELS][NUM_SLOTRANKS][NUM_LANES];
+	int rising_edges[NUM_CHANNELS][NUM_SLOTRANKS][NUM_LANES];
+	int channel, slotrank, lane;
+
+	FOR_ALL_POPULATED_CHANNELS
+	{
+		  fill_pattern5 (ctrl, channel);
+		  write32 (DEFAULT_MCHBAR + 0x4288 + 0x400 * channel, 0x1f);
+	}
+
+	/* FIXME: under some conditions (older chipsets?) vendor BIOS sets both edges to the same value.  */
+	write32 (DEFAULT_MCHBAR + 0x4eb0, 0x300);
+
+	FOR_ALL_CHANNELS
+		FOR_ALL_POPULATED_RANKS
+	{
+		discover_edges_write_real (ctrl, channel, slotrank, falling_edges[channel][slotrank]);
+	}
+
+	write32 (DEFAULT_MCHBAR + 0x4eb0, 0x200);
+
+	FOR_ALL_CHANNELS
+		FOR_ALL_POPULATED_RANKS
+	{
+		discover_edges_write_real (ctrl, channel, slotrank, rising_edges[channel][slotrank]);
+	}
+
+	write32 (DEFAULT_MCHBAR + 0x4eb0, 0);
+
+	FOR_ALL_CHANNELS
+	   FOR_ALL_POPULATED_RANKS
+		FOR_ALL_LANES
+	{
+			ctrl->timings[channel][slotrank].lanes[lane].falling = falling_edges[channel][slotrank][lane];
+			ctrl->timings[channel][slotrank].lanes[lane].rising = rising_edges[channel][slotrank][lane];
+	}
+
+
+	FOR_ALL_POPULATED_CHANNELS
+		program_timings (ctrl, channel);
+
+	FOR_ALL_CHANNELS
+	    FOR_ALL_POPULATED_RANKS
+		FOR_ALL_LANES
+	{
+		write32 (0x4080 + 4 * lane + 0x400 * channel + DEFAULT_MCHBAR, 0);
+	}
+}
+
+static void
+discover_timC_write (ramctr_timing *ctrl)
+{
+	const u8 rege3c_b24[3] = { 0, 0xf, 0x2f };
+	int i;
+
+	int lower[NUM_CHANNELS][NUM_SLOTRANKS][NUM_LANES];
+	int upper[NUM_CHANNELS][NUM_SLOTRANKS][NUM_LANES];
+	int channel, slotrank, lane;
+
+	FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS FOR_ALL_LANES
+	{
+		lower[channel][slotrank][lane] = 0;
+		upper[channel][slotrank][lane] = MAX_TIMC;
+	}
+
+    write32 (DEFAULT_MCHBAR + 0x4ea8, 1);
+
+    for (i = 0; i < 3; i++)
+	    FOR_ALL_POPULATED_CHANNELS
+	    {
+		    write32 ((channel << 8) + DEFAULT_MCHBAR + 0xe3c,
+			     (rege3c_b24[i] << 24) | (read32 ((channel << 8) + DEFAULT_MCHBAR + 0xe3c) & ~0x3f000000));
+		    udelay (2);
+		    FOR_ALL_POPULATED_RANKS
+		    {
+			    int timC;
+			    u32 raw_statistics[MAX_TIMC + 1];
+			    int statistics[MAX_TIMC + 1];
+
+			    /* FIXME: trace shows that vendor BIOS also tests with other patterns.
+			       I'm not sure whether it's really needed.
+			    */
+			    fill_pattern5 (ctrl, channel);
+			    for (timC = 0; timC < MAX_TIMC + 1; timC++)
+			    {
+				    FOR_ALL_LANES
+					    ctrl->timings[channel][slotrank].lanes[lane].timC = timC;
+				    program_timings (ctrl, channel);
+				    wait_428c (channel);
+				    write32 (DEFAULT_MCHBAR + 0x4220 + channel * 0x400, 0x1f006);
+				    write32 (DEFAULT_MCHBAR + 0x4230 + 0x400 * channel, (max ((ctrl->tFAW >> 2) + 1,
+											      ctrl->tRRD) << 10) | (ctrl->tRCD << 16) | 4);
+				    write32 (DEFAULT_MCHBAR + 0x4200 + 0x400 * channel, (slotrank << 24) | 0x60000);
+				    write32 (DEFAULT_MCHBAR + 0x4210 + channel * 0x400, 0x244);
+			    
+				    write32 (DEFAULT_MCHBAR + 0x4224 + channel * 0x400, 0x1f201);
+				    write32 (DEFAULT_MCHBAR + 0x4234 + 0x400 * channel, 0x80011e0 | ((ctrl->tWTR + ctrl->CWL + 8) << 16));
+				    write32 (DEFAULT_MCHBAR + 0x4204 + 0x400 * channel, (slotrank << 24));
+				    write32 (DEFAULT_MCHBAR + 0x4214 + channel * 0x400, 0x242);
+			    
+				    write32 (DEFAULT_MCHBAR + 0x4228 + channel * 0x400, 0x1f105);
+				    write32 (DEFAULT_MCHBAR + 0x4238 + 0x400 * channel, 0x40011e0 | (max (ctrl->tRTP, 8) << 16));
+				    write32 (DEFAULT_MCHBAR + 0x4208 + 0x400 * channel, (slotrank << 24));
+				    write32 (DEFAULT_MCHBAR + 0x4218 + channel * 0x400, 0x242);
+
+				    write32 (DEFAULT_MCHBAR + 0x422c + channel * 0x400, 0x1f002);
+				    write32 (DEFAULT_MCHBAR + 0x423c + 0x400 * channel, 0x1001 | (ctrl->tRP << 16));
+				    write32 (DEFAULT_MCHBAR + 0x420c + 0x400 * channel,(slotrank << 24) | 0x60400);
+				    write32 (DEFAULT_MCHBAR + 0x421c + channel * 0x400, 0);
+			    
+				    write32 (DEFAULT_MCHBAR + 0x4284 + 0x400 * channel, 0xc0001);
+				    wait_428c (channel);
+				    raw_statistics[timC] = MCHBAR32 (0x436c + channel * 0x400);
+				    printk (BIOS_ERR, "Cstat %02x %02x\n", timC, raw_statistics[timC]);
+			    }
+			    FOR_ALL_LANES
+			    {
+				    struct run rn;
+				    for (timC = 0; timC <= MAX_TIMC; timC++)
+					    statistics[timC] = !!(raw_statistics[timC] & (1 << lane));
+				    rn = get_longest_zero_run (statistics, MAX_TIMC + 1);
+				    if (rn.all)
+					    die ("timC write discovery failed");
+				    printk (BIOS_ERR, "timC: %d, %d, %d: 0x%x-0x%x-0x%x, 0x%x-0x%x\n", channel, slotrank, i,
+					    rn.start, rn.middle, rn.end,
+					    rn.start + ctrl->timC_offset[i],
+					    rn.end - ctrl->timC_offset[i]);
+				    lower[channel][slotrank][lane] = max (rn.start + ctrl->timC_offset[i],
+									  lower[channel][slotrank][lane]);
+				    upper[channel][slotrank][lane] = min (rn.end - ctrl->timC_offset[i],
+									  upper[channel][slotrank][lane]);
+					      
+			    }
+		    }
+	    }
+
+    write32 ((channel << 8) + DEFAULT_MCHBAR + 0xe3c,
+	     0 | (read32 ((channel << 8) + DEFAULT_MCHBAR + 0xe3c) & ~0x3f000000));
+    udelay (2);
+
+    write32 (DEFAULT_MCHBAR + 0x4ea8, 0);
+
+    printk (BIOS_ERR, "CPB\n");
+
+    FOR_ALL_CHANNELS
+	FOR_ALL_POPULATED_RANKS
+	    FOR_ALL_LANES
+    {
+	    printk (BIOS_ERR, "timC [%d, %d, %d] = 0x%x\n", channel, slotrank, lane,
+		    (lower[channel][slotrank][lane] + upper[channel][slotrank][lane]) / 2);
+	  ctrl->timings[channel][slotrank].lanes[lane].timC =
+		  (lower[channel][slotrank][lane] + upper[channel][slotrank][lane]) / 2;
+    }
+
+  FOR_ALL_POPULATED_CHANNELS
+	  program_timings (ctrl, channel);
+}
+
+static void
+normalize_training (ramctr_timing *ctrl)
+{
+  int channel, slotrank, lane;
+  int mat = 0;
+
+  FOR_ALL_CHANNELS
+    FOR_ALL_POPULATED_RANKS
+	{
+	  int delta;
+	  FOR_ALL_LANES
+	    mat = max (ctrl->timings[channel][slotrank].lanes[lane].timA, mat);
+	  delta = (mat >> 6) - ctrl->timings[channel][slotrank].val_4028;
+	  ctrl->timings[channel][slotrank].val_4024 += delta;
+	  ctrl->timings[channel][slotrank].val_4028 += delta;
+	}
+
+  FOR_ALL_POPULATED_CHANNELS
+	  program_timings (ctrl, channel);
+}
+
+
+static void
+write_controller_mr(ramctr_timing *ctrl)
+{
+	int channel, slotrank;
+
+	FOR_ALL_CHANNELS
+		FOR_ALL_POPULATED_RANKS
+	{
+		write32 (DEFAULT_MCHBAR | 0x0004 | (channel << 8) | lane_registers[slotrank], make_mr0 (ctrl, slotrank));
+		write32 (DEFAULT_MCHBAR | 0x0008 | (channel << 8) | lane_registers[slotrank], make_mr1 (ctrl, slotrank));
+	}
+}
+
+static void
+channel_test(ramctr_timing *ctrl)
+{
+	int channel, slotrank, lane;
+
+	FOR_ALL_POPULATED_CHANNELS
+		if (read32 (DEFAULT_MCHBAR | 0x42a0 | (channel << 10)) & 0xa000)
+			die ("Mini channel test failed (1)\n");
+	FOR_ALL_POPULATED_CHANNELS
+	{
+		fill_pattern0 (ctrl, channel, 0x12345678, 0x98765432);
+
+		write32 (DEFAULT_MCHBAR | 0x4288 | (channel << 10), 0);
+	}
+
+	for (slotrank = 0; slotrank < 4; slotrank++)
+		FOR_ALL_CHANNELS
+			if ((ctrl->rankmap[channel][0] | ctrl->rankmap[channel][1]) & (1 << slotrank))
+		{
+			FOR_ALL_LANES
+			{
+				write32 (DEFAULT_MCHBAR | (0x4f40 + 4 * lane), 0);
+				write32 (DEFAULT_MCHBAR | (0x4d40 + 4 * lane), 0);
+			}
+			wait_428c (channel);
+			write32 (DEFAULT_MCHBAR | 0x4220 | (channel << 10), 0x0001f006);
+			write32 (DEFAULT_MCHBAR | 0x4230 | (channel << 10), 0x0028a004);
+			write32 (DEFAULT_MCHBAR | 0x4200 | (channel << 10),
+				 0x00060000 | (slotrank << 24));
+			write32 (DEFAULT_MCHBAR | 0x4210 | (channel << 10), 0x00000244);
+			write32 (DEFAULT_MCHBAR | 0x4224 | (channel << 10), 0x0001f201);
+			write32 (DEFAULT_MCHBAR | 0x4234 | (channel << 10), 0x08281064);
+			write32 (DEFAULT_MCHBAR | 0x4204 | (channel << 10),
+				 0x00000000 | (slotrank << 24));
+			write32 (DEFAULT_MCHBAR | 0x4214 | (channel << 10), 0x00000242);
+			write32 (DEFAULT_MCHBAR | 0x4228 | (channel << 10), 0x0001f105);
+			write32 (DEFAULT_MCHBAR | 0x4238 | (channel << 10), 0x04281064);
+			write32 (DEFAULT_MCHBAR | 0x4208 | (channel << 10),
+				 0x00000000 | (slotrank << 24));
+			write32 (DEFAULT_MCHBAR | 0x4218 | (channel << 10), 0x00000242);
+			write32 (DEFAULT_MCHBAR | 0x422c | (channel << 10), 0x0001f002);
+			write32 (DEFAULT_MCHBAR | 0x423c | (channel << 10), 0x00280c01);
+			write32 (DEFAULT_MCHBAR | 0x420c | (channel << 10),
+				 0x00060400 | (slotrank << 24));
+			write32 (DEFAULT_MCHBAR | 0x421c | (channel << 10), 0x00000240);
+			write32 (DEFAULT_MCHBAR | 0x4284 | (channel << 10), 0x000c0001);
+			wait_428c (channel);
+			FOR_ALL_LANES
+				if (read32 (DEFAULT_MCHBAR | 0x4340 | (channel << 10)))
+					die ("Mini channel test failed (2)\n");
+		}
+}
+
+static void
+set_scrambling_seed (ramctr_timing *ctrl)
+{
+	int channel;
+
+	/* FIXME: we hardcode seeds. Do we need to use some PRNG for them?
+	   I don't think so.  */
+	static u32 seeds[2][3] = {
+		{ 0x00009a36, 0xbafcfdcf, 0x46d1ab68},
+		{ 0x00028bfa, 0x53fe4b49, 0x19ed5483}
+	};
+	FOR_ALL_POPULATED_CHANNELS
+	{
+		MCHBAR32(0x4020 + channel * 0x400) &= ~0x10000000;
+		write32 (DEFAULT_MCHBAR | 0x4034, seeds[channel][0]);
+		write32 (DEFAULT_MCHBAR | 0x403c, seeds[channel][1]);
+		write32 (DEFAULT_MCHBAR | 0x4038, seeds[channel][2]);
+	}
+}
+
+static void
+set_4f8c (void)
+{
+	struct cpuid_result cpures;
+	u32 cpu;
+
+	cpures = cpuid(0);
+	cpu = (cpures.eax);
+	if (IS_SANDY_CPU(cpu) && (IS_SANDY_CPU_D0(cpu) || IS_SANDY_CPU_D1(cpu))) {
+		MCHBAR32(0x4f8c) = 0x141D1519;
+	} else {
+		MCHBAR32(0x4f8c) = 0x551D1519;
+	}
+}
+
+static void
+prepare_training (ramctr_timing *ctrl)
+{
+	int channel;
+
+	FOR_ALL_POPULATED_CHANNELS {
+		// Always drive command bus
+		MCHBAR32(0x4004 + 0x400 * channel) |= 0x20000000;
+	}
+
+	udelay (1);
+
+	FOR_ALL_POPULATED_CHANNELS
+		wait_428c (channel);
+}
+
+static void
+hardcode1 (ramctr_timing *ctrl)
+{
+	int channel;
+	FOR_ALL_POPULATED_CHANNELS {
+		read32 (DEFAULT_MCHBAR | 0x400c | (channel << 10));	// !!! = 0x000258b4
+		write32 (DEFAULT_MCHBAR | 0x400c | (channel << 10), 0x000058b4); // FIXME: hardcoded
+		write32 (DEFAULT_MCHBAR | 0x4008 | (channel << 10), 0x0a042220); // FIXME: hardcoded
+	}
+}
+
+static void
+set_42a0 (ramctr_timing *ctrl)
+{
+	int channel;
+	FOR_ALL_POPULATED_CHANNELS {
+		write32 (DEFAULT_MCHBAR | (0x42a0 + 0x400 * channel), 0x00001000 | ctrl->rankmap[channel][0] | (ctrl->rankmap[channel][1] << 2));
+		MCHBAR32(0x4004 + 0x400 * channel) &= ~0x20000000; // OK
+	}
+}
+
+void init_dram_ddr3(spd_raw_data *spds, int mobile)
+{
+	int me_uma_size;
+	report_platform_info();
+
+	/* Wait for ME to be ready */
+	intel_early_me_init();
+	me_uma_size = intel_early_me_uma_size();
+
+	printk(BIOS_DEBUG, "Starting native Platform init\n");
+
+	pch_init ();
+
+	u32 reg_5d10;
+
+	wait_txt_clear ();
+
+	wrmsr (0x000002e6, (msr_t) { .lo = 0, .hi = 0 });
+
+	reg_5d10 = read32 (DEFAULT_MCHBAR | 0x5d10);	// !!! = 0x00000000
+	if ((pcie_read_config16 (SOUTHBRIDGE, 0xa2) & 0xa0) == 0x20	/* 0x0004 */
+	    && reg_5d10) {
+		/* Need reset.  */
+		outb (0x6, 0xcf9);
+
+		while (1);
+	}
+
+	ramctr_timing ctrl;
+
+	dimm_info info;
+
+	ctrl.mobile = mobile;
+
+	/* Get DDR3 SPD data */
+	dram_find_spds_ddr3(spds, &info, &ctrl);
+
+	/* Find fastest common supported parameters */
+	dram_find_common_params(&info, &ctrl);
+
+	/* Calculate timings */
+	dram_timing(&ctrl);
+
+	/* Set MCU frequency */
+	dram_freq(&ctrl);
+
+	/* Set version register */
+	MCHBAR32(0x5034) = 0xC04EB002;
+
+	/* Enable crossover */
+	dram_xover(&ctrl);
+
+	/* Set timing and refresh registers */
+	dram_timing_regs(&ctrl);
+
+	/* Power mode preset */
+	MCHBAR32(0x4e80) = 0x5500;
+
+	/* Set scheduler parameters */
+	MCHBAR32(0x4c20) = 0x10100005;
+
+	/* Set cpu specific register */
+	set_4f8c ();
+
+	/* Clear IO reset bit */
+	MCHBAR32(0x5030) &= ~0x20;
+
+	/* FIXME: Fix dimm map - Set MAD-DIMM registers */
+	dram_dimm_mapping(&info, &ctrl);
+	printram("Done dimm mapping\n");
+
+	/* Zone config */
+	dram_zones(&info, &ctrl, 1);
+
+	/* Set memory map */
+	dram_memorymap(&info, me_uma_size);
+	printram("Done memory map\n");
+
+	/* Set IO registers */
+	dram_ioregs(&ctrl);
+	printram("Done io registers\n");
+
+	udelay (1);
+
+	/* Do jedec ddr3 reset sequence */
+	dram_jedecreset(&ctrl);
+	printram("Done jedec reset\n");
+
+	/* MRS commands */
+	dram_mrscommands(&ctrl);
+	printram("Done MRS commands\n");
+	dram_mrscommands(&ctrl);
+
+	/* Prepare for memory training */
+	prepare_training(&ctrl);
+
+	read_training (&ctrl);
+	write_training (&ctrl);
+
+	printk (BIOS_ERR, "CP5a\n");
+  
+	discover_edges (&ctrl);
+
+	printk (BIOS_ERR, "CP5b\n");
+
+	command_training(&ctrl);
+
+	printk (BIOS_ERR, "CP5c\n");
+
+	discover_edges_write(&ctrl);
+
+	discover_timC_write(&ctrl);
+
+	normalize_training (&ctrl);
+
+	hardcode1 (&ctrl);
+
+	write_controller_mr(&ctrl);
+
+	channel_test(&ctrl);
+
+	write32 (DEFAULT_MCHBAR | 0x5024, 0x00a030ce);// FIXME: hardcoded
+
+	set_scrambling_seed(&ctrl);
+
+	set_42a0 (&ctrl);
+
+	write32 (DEFAULT_MCHBAR | 0x4cd4, 0x00000046); // FIXME: hardcoded
+
+	write32 (DEFAULT_MCHBAR | 0x400c, (read32 (DEFAULT_MCHBAR | 0x400c) & 0xFFFFCFFF) | 0x1000); // OK
+	write32 (DEFAULT_MCHBAR | 0x440c, (read32 (DEFAULT_MCHBAR | 0x440c) & 0xFFFFCFFF) | 0x1000); // OK
+	write32 (DEFAULT_MCHBAR | 0x4cb0, 0x00000740); // FIXME: hardcoded
+	write32 (DEFAULT_MCHBAR | 0x4380, 0x00000aaa); // OK
+	write32 (DEFAULT_MCHBAR | 0x4780, 0x00000aaa); // OK
+	write32 (DEFAULT_MCHBAR | 0x4f88, 0x5f7003ff); // OK
+	write32 (DEFAULT_MCHBAR | 0x5064, 0x00073193); // FIXME: hardcoded
+	write32 (DEFAULT_MCHBAR | 0x4384, 0x009b6ea1);// FIXME: hardcoded
+	write32 (DEFAULT_MCHBAR | 0x4784, 0x009b6ea1);// FIXME: hardcoded
+	write32 (DEFAULT_MCHBAR | 0x5880, 0xca9171e5);// FIXME: hardcoded
+	read32 (DEFAULT_MCHBAR | 0x5888);	// !!! = 0x00e4d5d0
+	write32 (DEFAULT_MCHBAR | 0x5888, 0x00e4d5d0);// FIXME: hardcoded
+	read32 (DEFAULT_MCHBAR | 0x58a8);	// !!! = 0x00000000
+	write32 (DEFAULT_MCHBAR | 0x58a8, 0x00000000);// FIXME: hardcoded
+	read32 (DEFAULT_MCHBAR | 0x4294);	// !!! = 0x000098ff
+	write32 (DEFAULT_MCHBAR | 0x4294, 0x000198ff);// FIXME: hardcoded
+	read32 (DEFAULT_MCHBAR | 0x4694);	// !!! = 0x000098ff
+	write32 (DEFAULT_MCHBAR | 0x4694, 0x000198ff);// FIXME: hardcoded
+
+	MCHBAR32 (0x5030) |= 1; // OK
+	MCHBAR32 (0x5030) |= 0x80; // OK
+	MCHBAR32 (0x5f18) = 0xfa; // OK
+	read32 (DEFAULT_MCHBAR | 0x5d10);	// !!! = 0x00000000
+	write32 (DEFAULT_MCHBAR | 0x5d10, 0x2010040c); // FIXME: hardcoded
+
+	/* Zone config */
+	dram_zones(&info, &ctrl, 0);
+
+#if CONFIG_USBDEBUG_IN_ROMSTAGE
+	/* mrc.bin reconfigures USB, so reinit it to have debug */
+	usbdebug_init();
+#endif
+
+	/* FIXME: uncomment this once dram_memorymap is fixed.  */
+//	intel_early_me_init_done(ME_INIT_STATUS_SUCCESS);
 
-	post_system_agent_init(pei_data);
+	post_system_agent_init();
 	report_memory_config();
 }
diff --git a/src/northbridge/intel/sandybridge/raminit.h b/src/northbridge/intel/sandybridge/raminit.h
index c3b1c2a..7e0b10e 100644
--- a/src/northbridge/intel/sandybridge/raminit.h
+++ b/src/northbridge/intel/sandybridge/raminit.h
@@ -20,17 +20,11 @@
 #ifndef RAMINIT_H
 #define RAMINIT_H
 
-#include "pei_data.h"
+#include <device/dram/ddr3.h>
 
-struct sys_info {
-	u8 boot_path;
-#define BOOT_PATH_NORMAL	0
-#define BOOT_PATH_RESET		1
-#define BOOT_PATH_RESUME	2
-} __attribute__ ((packed));
-
-void sdram_initialize(struct pei_data *pei_data);
-void save_mrc_data(struct pei_data *pei_data);
-int fixup_sandybridge_errata(void);
+/* The order is ch0dimmA, ch0dimmB, ch1dimmA, ch1dimmB.  */
+void init_dram_ddr3(spd_raw_data *spds, int mobile);
+void pch_init(void);
+void read_spd(spd_raw_data *spd, u8 addr);
 
 #endif				/* RAMINIT_H */



More information about the coreboot-gerrit mailing list