diff --git a/ex.py b/ex.py
index 41a69b6..4355683 100644
--- a/ex.py
+++ b/ex.py
@@ -29,7 +29,7 @@ initial_cycles = prgm.perf()
 print("Program to optimize:")
 prgm.display()
 
-print(f"Cost: {initial_cost}")
+print(f"Cost: {initial_cost[0]}")
 print(f"Cycles: {initial_cycles}")
 
 start_time = time()
@@ -51,7 +51,7 @@ optimized_prgm.display()
 optimized_cost = cost(prgm, test_cases, outputs, optimized_prgm)
 optimized_cycles = optimized_prgm.perf()
 
-print(f"Cost: {optimized_cost}")
+print(f"Cost: {optimized_cost[0]}")
 print(f"Cycles: {optimized_cycles}")
 
 print(f"Took {round(end_time - start_time, 3)} seconds")
\ No newline at end of file
diff --git a/gbso/optimize.py b/gbso/optimize.py
index 881e472..45392ed 100644
--- a/gbso/optimize.py
+++ b/gbso/optimize.py
@@ -19,15 +19,17 @@ DEFAULT_PROB_INSN_UNUSED = 0.1
 
 
 def cost(orig_prgm, test_cases, outputs, prgm) -> Tuple[int, bool]:
-    c = prgm.perf() - orig_prgm.perf()
-    eq = c == 0
-    # print(f"init cost: {c}")
+    # Since each instruction executes in 4*k cycles (for some k), this can have
+    # the undesirable effect of performance improvements being weighted much
+    # higher than correctness. This hurts convergence pretty badly, so we scale
+    # by 1/4 to compensate.
+    perf = (prgm.perf() - orig_prgm.perf()) / 4.0
+    eq = 0
 
     for test_case in test_cases:
-        c += eq_on_testcase(orig_prgm, prgm, test_case, outputs)
-        # print(f"cost after testcase: {c}")
+        eq += eq_on_testcase(orig_prgm, prgm, test_case, outputs)
 
-    return c, eq
+    return perf + eq, eq == 0
 
 
 def optimize(