* percentages of the lru and ageable caches. This should balance the seeks
* generated by these structures.
*
- * If the vm encounted mapped pages on the LRU it increase the pressure on
+ * If the vm encountered mapped pages on the LRU it increase the pressure on
* slab to avoid swapping.
*
* We do weird things to avoid (scanned*seeks*entries) overflowing 32 bits.
long mapped_ratio;
long distress;
long swap_tendency;
+ long imbalance;
if (zone_is_near_oom(zone))
goto force_reclaim_mapped;
*/
swap_tendency = mapped_ratio / 2 + distress + sc->swappiness;
+ /*
+ * If there's huge imbalance between active and inactive
+ * (think active 100 times larger than inactive) we should
+ * become more permissive, or the system will take too much
+ * cpu before it start swapping during memory pressure.
+ * Distress is about avoiding early-oom, this is about
+ * making swappiness graceful despite setting it to low
+ * values.
+ *
+ * Avoid div by zero with nr_inactive+1, and max resulting
+ * value is vm_total_pages.
+ */
+ imbalance = zone_page_state(zone, NR_ACTIVE);
+ imbalance /= zone_page_state(zone, NR_INACTIVE) + 1;
+
+ /*
+ * Reduce the effect of imbalance if swappiness is low,
+ * this means for a swappiness very low, the imbalance
+ * must be much higher than 100 for this logic to make
+ * the difference.
+ *
+ * Max temporary value is vm_total_pages*100.
+ */
+ imbalance *= (vm_swappiness + 1);
+ imbalance /= 100;
+
+ /*
+ * If not much of the ram is mapped, makes the imbalance
+ * less relevant, it's high priority we refill the inactive
+ * list with mapped pages only in presence of high ratio of
+ * mapped pages.
+ *
+ * Max temporary value is vm_total_pages*100.
+ */
+ imbalance *= mapped_ratio;
+ imbalance /= 100;
+
+ /* apply imbalance feedback to swap_tendency */
+ swap_tendency += imbalance;
+
/*
* Now use this metric to decide whether to start moving mapped
* memory onto the inactive list.
unsigned long nr_to_scan;
unsigned long nr_reclaimed = 0;
- atomic_inc(&zone->reclaim_in_progress);
-
/*
* Add one to `nr_to_scan' just to make sure that the kernel will
* slowly sift through the active list.
}
throttle_vm_writeout(sc->gfp_mask);
-
- atomic_dec(&zone->reclaim_in_progress);
return nr_reclaimed;
}
note_zone_scanning_priority(zone, priority);
- if (zone->all_unreclaimable && priority != DEF_PRIORITY)
+ if (zone_is_all_unreclaimable(zone) && priority != DEF_PRIORITY)
continue; /* Let kswapd poll it */
sc->all_unreclaimable = 0;
*/
if (priority < 0)
priority = 0;
- for (i = 0; zones[i] != 0; i++) {
+ for (i = 0; zones[i] != NULL; i++) {
struct zone *zone = zones[i];
if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
if (!populated_zone(zone))
continue;
- if (zone->all_unreclaimable && priority != DEF_PRIORITY)
+ if (zone_is_all_unreclaimable(zone) &&
+ priority != DEF_PRIORITY)
continue;
if (!zone_watermark_ok(zone, order, zone->pages_high,
if (!populated_zone(zone))
continue;
- if (zone->all_unreclaimable && priority != DEF_PRIORITY)
+ if (zone_is_all_unreclaimable(zone) &&
+ priority != DEF_PRIORITY)
continue;
if (!zone_watermark_ok(zone, order, zone->pages_high,
temp_priority[i] = priority;
sc.nr_scanned = 0;
note_zone_scanning_priority(zone, priority);
- nr_reclaimed += shrink_zone(priority, zone, &sc);
+ /*
+ * We put equal pressure on every zone, unless one
+ * zone has way too many pages free already.
+ */
+ if (!zone_watermark_ok(zone, order, 8*zone->pages_high,
+ end_zone, 0))
+ nr_reclaimed += shrink_zone(priority, zone, &sc);
reclaim_state->reclaimed_slab = 0;
nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL,
lru_pages);
nr_reclaimed += reclaim_state->reclaimed_slab;
total_scanned += sc.nr_scanned;
- if (zone->all_unreclaimable)
+ if (zone_is_all_unreclaimable(zone))
continue;
if (nr_slab == 0 && zone->pages_scanned >=
(zone_page_state(zone, NR_ACTIVE)
+ zone_page_state(zone, NR_INACTIVE)) * 6)
- zone->all_unreclaimable = 1;
+ zone_set_flag(zone,
+ ZONE_ALL_UNRECLAIMABLE);
/*
* If we've done a decent amount of scanning and
* the reclaim ratio is low, start doing writepage
if (!populated_zone(zone))
continue;
- if (zone->all_unreclaimable && prio != DEF_PRIORITY)
+ if (zone_is_all_unreclaimable(zone) && prio != DEF_PRIORITY)
continue;
/* For pass = 0 we don't shrink the active list */
{
pg_data_t *pgdat;
cpumask_t mask;
+ int nid;
if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN) {
- for_each_online_pgdat(pgdat) {
+ for_each_node_state(nid, N_HIGH_MEMORY) {
+ pgdat = NODE_DATA(nid);
mask = node_to_cpumask(pgdat->node_id);
if (any_online_cpu(mask) != NR_CPUS)
/* One of our CPUs online: restore mask */
int nid;
swap_setup();
- for_each_online_node(nid)
+ for_each_node_state(nid, N_HIGH_MEMORY)
kswapd_run(nid);
hotcpu_notifier(cpu_callback, 0);
return 0;
int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
{
- cpumask_t mask;
int node_id;
+ int ret;
/*
* Zone reclaim reclaims unmapped file backed pages and
<= zone->min_slab_pages)
return 0;
+ if (zone_is_all_unreclaimable(zone))
+ return 0;
+
/*
- * Avoid concurrent zone reclaims, do not reclaim in a zone that does
- * not have reclaimable pages and if we should not delay the allocation
- * then do not scan.
+ * Do not scan if the allocation should not be delayed.
*/
- if (!(gfp_mask & __GFP_WAIT) ||
- zone->all_unreclaimable ||
- atomic_read(&zone->reclaim_in_progress) > 0 ||
- (current->flags & PF_MEMALLOC))
+ if (!(gfp_mask & __GFP_WAIT) || (current->flags & PF_MEMALLOC))
return 0;
/*
* as wide as possible.
*/
node_id = zone_to_nid(zone);
- mask = node_to_cpumask(node_id);
- if (!cpus_empty(mask) && node_id != numa_node_id())
+ if (node_state(node_id, N_CPU) && node_id != numa_node_id())
+ return 0;
+
+ if (zone_test_and_set_flag(zone, ZONE_RECLAIM_LOCKED))
return 0;
- return __zone_reclaim(zone, gfp_mask, order);
+ ret = __zone_reclaim(zone, gfp_mask, order);
+ zone_clear_flag(zone, ZONE_RECLAIM_LOCKED);
+
+ return ret;
}
#endif