GA(Genetic Algorithm)
遗传算法
GA是一种基于自然群体遗传演化机制的高效探索算法,它是美国学者Holland于1975年首先提出来的。
它摒弃了传统的搜索方式,模拟自然界生物进化过程,采用人工进化的方式对目标空间进行随机化搜索。它将问题域中的可能解看作是群体的一个个体或染色体,并将每一个体编码成符号串形式,模拟达尔文的遗传选择和自然淘汰的生物进化过程,对群体反复进行基于遗传学的操作(遗传,交叉和变异),根据预定的目标适应度函数对每个个体进行评价,依据适者生存,优胜劣汰的进化规则,不断得到更优的群体,同时以全局并行搜索方式来搜索优化群体中的最优个体,求得满足要求的最优解。
Holland创建的遗传算法是一种概率搜索算法,它是利用某种编码技术作用于称为染色体的数串,其基本思想是模拟由这些组成的进化过程。跗算法通过有组织地然而是随机地信息交换重新组合那些适应性好的串,在每一代中,利用上一代串结构中适应好的位和段来生成一个新的串的群体;作为额外增添,偶尔也要在串结构中尝试用新的位和段来替代原来的部分。
遗传算法是一类随机化算法,但是它不是简单的随机走动,它可以有效地利用已经有的信息处理来搜索那些有希望改善解质量的串,类似于自然进化,遗传算法通过作用于染色体上的基因,寻找好的染色体来求解问题。与自然界相似,遗传算法对待求解问题本身一无所知,它所需要的仅是对算法所产生的每个染色体进行评价,并基于适应度值来造反染色体,使适用性好的染色体比适应性差的染色体有更多的繁殖机会。
基因:组成染色体的单元,可以表示为一个二进制位,一个整数或一个字符等。
染色体或个体:表示待求解问题的一个可能解,由若干基因组成,是GA操作的基本对象。
群体:一定数量的个体组成了群体,表示GA的遗传搜索空间。
适应度或适度:代表一个个体所对应解的优劣,通常由某一适应度函数表示。
选择:GA的基本操作之一,即根据个体的适应度,在群体中按照一定的概论选择可以作为父本的个体,选择依据是适应度大的个体被选中的概率高。选择操作体现了适者生存,优胜劣汰的进化规则。
交叉:GA的基本操作之一,即将父本个体按照一定的概率随机地交换基因形成新的个体。
变异:GA的基本操作之一,即即按一定概率随机改变某个体的基因值。
// All code copyright (c) 2003 Barry Lapthorn
// Website: http://www.lapthorn.net
//
// Disclaimer:
// All code is provided on an "AS IS" basis, without warranty. The author
// makes no representation, or warranty, either express or implied, with
// respect to the code, its quality, accuracy, or fitness for a specific
// purpose. Therefore, the author shall not have any liability to you or any
// other person or entity with respect to any liability, loss, or damage
// caused or alleged to have been caused directly or indirectly by the code
// provided. This includes, but is not limited to, interruption of service,
// loss of data, loss of profits, or consequential damages from the use of
// this code.
//
//
// $Author: barry $
// $Revision: 1.1 $
//
// $Id: GA.cs,v 1.1 2003/08/19 20:59:05 barry Exp $
// 秒大刀在原文基础上添加了注释,并移植到C#2.0平台
using System;
using System.Collections;
using btl.generic;
namespace btl.generic
{
/// <summary>
/// 基因
/// </summary>
public class Genome
{
/// <summary>
/// 构造基因
/// </summary>
public Genome()
{
}
/// <summary>
/// 构造基因
/// </summary>
/// <param name="length">基因的长度</param>
public Genome(int length)
{
m_length = length;
m_genes = new double[ length ];
CreateGenes();
}
/// <summary>
/// 构造基因
/// </summary>
/// <param name="length">基因的长度</param>
/// <param name="createGenes">是否初始化基因</param>
public Genome(int length, bool createGenes)
{
m_length = length;
m_genes = new double[ length ];
if (createGenes)
CreateGenes();
}
/// <summary>
/// 构造基因
/// </summary>
/// <param name="genes">按照给定的基因值初始化基因</param>
public Genome(ref double[] genes)
{
m_length = genes.GetLength(0);
m_genes = new double[ m_length ];
for (int i = 0 ; i < m_length ; i++)
m_genes[i] = genes[i];
}
/// <summary>
/// 初始化基因组
/// </summary>
private void CreateGenes()
{
// DateTime d = DateTime.UtcNow;
for (int i = 0 ; i < m_length ; i++)
m_genes[i] = m_random.NextDouble();
}
/// <summary>
/// 实现基因的杂交
/// </summary>
/// <param name="genome2">要进行杂交的配偶</param>
/// <param name="child1">[Out]杂交结果1</param>
/// <param name="child2">[Out]杂交结果2</param>
public void Crossover(ref Genome genome2, out Genome child1, out Genome child2)
{
//获得杂交点
int pos = (int)(m_random.NextDouble() * (double)m_length);
child1 = new Genome(m_length, false);
child2 = new Genome(m_length, false);
//从杂交点实现两条基因链的交叉
for(int i = 0 ; i < m_length ; i++)
{
if (i < pos)
{
child1.m_genes[i] = m_genes[i];
child2.m_genes[i] = genome2.m_genes[i];
}
else
{
child1.m_genes[i] = genome2.m_genes[i];
child2.m_genes[i] = m_genes[i];
}
}
}
/// <summary>
/// 实现基因组的一次突变
/// </summary>
public void Mutate()
{
for (int pos = 0 ; pos < m_length; pos++)
{
if (m_random.NextDouble() < m_mutationRate)
m_genes[pos] = (m_genes[pos] + m_random.NextDouble())/2.0;
}
}
/// <summary>
/// 得到基因组
/// </summary>
/// <returns></returns>
public double[] Genes()
{
return m_genes;
}
/// <summary>
/// 向控制台输出基因组
/// </summary>
public void Output()
{
for (int i = 0 ; i < m_length ; i++)
{
System.Console.WriteLine("{0:F4}", m_genes[i]);
}
System.Console.Write("\n");
}
/// <summary>
/// 将基因组存放到给定的参数中
/// </summary>
/// <param name="values">要存放基因组的输出参数,必须保证该参数空间的合法性</param>
public void GetValues(ref double[] values)
{
for (int i = 0 ; i < m_length ; i++)
values[i] = m_genes[i];
}
/// <summary>
/// 基因组
/// </summary>
public double[] m_genes;
/// <summary>
/// 基因的长度
/// </summary>
private int m_length;
/// <summary>
/// 该基因的适合度
/// </summary>
private double m_fitness;
static Random m_random = new Random();
/// <summary>
/// 突变率
/// </summary>
private static double m_mutationRate;
/// <summary>
/// gs基因的适合度
/// </summary>
public double Fitness
{
get
{
return m_fitness;
}
set
{
m_fitness = value;
}
}
/// <summary>
/// gs基因的整体突变率
/// </summary>
public static double MutationRate
{
get
{
return m_mutationRate;
}
set
{
m_mutationRate = value;
}
}
/// <summary>
/// g基因的长度
/// </summary>
public int Length
{
get
{
return m_length;
}
}
}
}
namespace btl.generic
{
/// <summary>
/// Compares genomes by fitness
/// 利用适合度来进行基因的比较
/// </summary>
public sealed class GenomeComparer : IComparer<Genome>
{
#region IComparer<Genome> 成员
public int Compare(Genome x, Genome y)
{
if (x.Fitness > y.Fitness)
return 1;
else if (x.Fitness == y.Fitness)
return 0;
else
return -1;
}
#endregion
}
}
namespace btl.generic
{
public delegate double GAFunction(double[] values);
/// <summary>
/// Genetic Algorithm class
/// </summary>
public class GA
{
#region 构造函数
/// <summary>
/// Default constructor sets mutation rate to 5%, crossover to 80%, population to 100,
/// and generations to 2000.
/// 构造一个遗传群体
/// </summary>
public GA()
{
InitialValues();
m_mutationRate = 0.05;
m_crossoverRate = 0.80;
m_populationSize = 100;
m_generationSize = 2000;
m_strFitness = "";
}
/// <summary>
/// 构造一个遗传群体
/// </summary>
/// <param name="crossoverRate">基因杂交率</param>
/// <param name="mutationRate">基因突变率</param>
/// <param name="populationSize">群体数量</param>
/// <param name="generationSize">遗传代数</param>
/// <param name="genomeSize">基因组大小</param>
public GA(double crossoverRate, double mutationRate, int populationSize, int generationSize, int genomeSize)
{
InitialValues();
m_mutationRate = mutationRate;
m_crossoverRate = crossoverRate;
m_populationSize = populationSize;
m_generationSize = generationSize;
m_genomeSize = genomeSize;
m_strFitness = "";
}
/// <summary>
/// 构造一个遗传群体
/// </summary>
/// <param name="genomeSize"></param>
public GA(int genomeSize)
{
InitialValues();
m_genomeSize = genomeSize;
}
#endregion 构造函数
#region 方法
public void InitialValues()
{
m_elitism = false;
}
/// <summary>
/// Method which starts the GA executing.
/// 启动算法
/// </summary>
public void Go()
{
if (getFitness == null)
throw new ArgumentNullException("Need to supply fitness function");
if (m_genomeSize == 0)
throw new IndexOutOfRangeException("Genome size not set");
// Create the fitness table.
m_fitnessTable = new List<double>();
m_thisGeneration = new List<Genome>(m_generationSize);
m_nextGeneration = new List<Genome>(m_generationSize);
Genome.MutationRate = m_mutationRate;
//创建初始种群
CreateGenomes();
//按照适合度对种群进行排序
RankPopulation();
#region Log
StreamWriter outputFitness = null;
bool write = false;
if (m_strFitness != "")
{
write = true;
outputFitness = new StreamWriter(m_strFitness);
}
#endregion
//按照给定的代数进行遗传
for (int i = 0; i < m_generationSize; i++)
{
//种群进化
CreateNextGeneration();
//按照适合度进行排序
RankPopulation();
#region Log
if (write)
{
if (outputFitness != null)
{
double d = m_thisGeneration[m_populationSize-1].Fitness;
outputFitness.WriteLine("{0},{1}",i,d);
}
}
#endregion
}
#region Log
if (outputFitness != null)
outputFitness.Close();
#endregion
}
/// <summary>
/// After ranking all the genomes by fitness, use a 'roulette wheel' selection
/// method. This allocates a large probability of selection to those with the
/// highest fitness.
/// 轮盘选择,会以概率的方式选择适合度更高的个体
/// [其实只是从中随机的选择一个个体]
/// </summary>
/// <returns>Random individual biased towards highest fitness 偏向更高适合度的一个随机个体</returns>
private int RouletteSelection()
{
double randomFitness = m_random.NextDouble() * m_totalFitness;
int index = m_fitnessTable.BinarySearch(randomFitness);
if (index >= 0)
{
return index;
}
else
{
index = ~index;
if (index == m_fitnessTable.Count)
return m_fitnessTable.Count - 1;
else
return index;
}
#region Old BinarySearch
//int idx = -1;
//int mid;
//int first = 0;
//int last = m_populationSize -1;
//mid = (last - first)/2;
// ArrayList's BinarySearch is for exact values only
// so do this by hand.
//while (idx == -1 && first <= last)
//{
// if (randomFitness < (double)m_fitnessTable[mid])
// {
// last = mid;
// }
// else if (randomFitness > (double)m_fitnessTable[mid])
// {
// first = mid;
// }
// mid = (first + last)/2;
// // lies between i and i+1
// if ((last - first) == 1)
// idx = last;
//}
//return idx;
#endregion
}
/// <summary>
/// Rank population and sort in order of fitness.
/// 按照适合度对种群进行排序
/// </summary>
private void RankPopulation()
{
//计算每组基因的适合度和该代的总体适合度
m_totalFitness = 0;
for (int i = 0; i < m_populationSize; i++)
{
Genome g = m_thisGeneration[i];
g.Fitness = FitnessFunction(g.Genes());
m_totalFitness += g.Fitness;
}
//将种群按照适合度的大小进行排序
m_thisGeneration.Sort(new GenomeComparer());
double fitness = 0.0;
m_fitnessTable.Clear();
for (int i = 0; i < m_populationSize; i++)
{
fitness += m_thisGeneration[i].Fitness;
m_fitnessTable.Add((double)fitness);
}
}
/// <summary>
/// Create the *initial* genomes by repeated calling the supplied fitness function
/// 创建一个初始种群
/// </summary>
private void CreateGenomes()
{
for (int i = 0; i < m_populationSize ; i++)
{
Genome g = new Genome(m_genomeSize);
m_thisGeneration.Add(g);
}
}
/// <summary>
/// 将整个群体向前进化一代
/// </summary>
private void CreateNextGeneration()
{
m_nextGeneration.Clear();
Genome bestGene = null;
if (m_elitism)
bestGene = m_thisGeneration[m_populationSize - 1];
for (int i = 0 ; i < m_populationSize ; i+=2)
{
int pidx1 = RouletteSelection();
int pidx2 = RouletteSelection();
Genome parent1, parent2, child1, child2;
parent1 = m_thisGeneration[pidx1];
parent2 = m_thisGeneration[pidx2];
//以一定的概率进行杂交
if (m_random.NextDouble() < m_crossoverRate)
{
parent1.Crossover(ref parent2, out child1, out child2);
}
else
{
child1 = parent1;
child2 = parent2;
}
//进行基因突变
child1.Mutate();
child2.Mutate();
//将新的个体加入到下一代
m_nextGeneration.Add(child1);
m_nextGeneration.Add(child2);
}
if (m_elitism && bestGene != null)
m_nextGeneration[0] = bestGene;
m_thisGeneration.Clear();
m_thisGeneration.AddRange(m_nextGeneration);
//for (int i = 0 ; i < m_populationSize; i++)
// m_thisGeneration.Add(m_nextGeneration[i]);
}
public void GetBest(out double[] values, out double fitness)
{
Genome g = ((Genome)m_thisGeneration[m_populationSize-1]);
values = new double[g.Length];
g.GetValues(ref values);
fitness = (double)g.Fitness;
}
public void GetWorst(out double[] values, out double fitness)
{
GetNthGenome(0, out values, out fitness);
}
public void GetNthGenome(int n, out double[] values, out double fitness)
{
if (n < 0 || n > m_populationSize-1)
throw new ArgumentOutOfRangeException("n too large, or too small");
Genome g = ((Genome)m_thisGeneration[n]);
values = new double[g.Length];
g.GetValues(ref values);
fitness = (double)g.Fitness;
}
#endregion
#region 字段
/// <summary>
/// 种群突变率
/// </summary>
private double m_mutationRate;
/// <summary>
/// 种群杂交率
/// </summary>
private double m_crossoverRate;
/// <summary>
/// 种群大小
/// </summary>
private int m_populationSize;
/// <summary>
/// 遗传代数
/// </summary>
private int m_generationSize;
/// <summary>
/// 基因长度
/// </summary>
private int m_genomeSize;
/// <summary>
/// 总体适合度
/// </summary>
private double m_totalFitness;
/// <summary>
/// 适合度
/// </summary>
private string m_strFitness;
/// <summary>
/// 是否保留群体中的最优者(最优者将不参加下一轮的生存竞争)
/// </summary>
private bool m_elitism;
/// <summary>
/// 当前代
/// </summary>
private List<Genome> m_thisGeneration;
/// <summary>
/// 下一代
/// </summary>
private List<Genome> m_nextGeneration;
/// <summary>
/// 适应性累加表
/// </summary>
private List<double> m_fitnessTable;
/// <summary>
/// 所要用到的随机数发生器
/// </summary>
static Random m_random = new Random();
/// <summary>
/// 评价函数
/// </summary>
static private GAFunction getFitness;
#endregion
#region 属性
/// <summary>
/// gs算法的评价函数
/// </summary>
public GAFunction FitnessFunction
{
get
{
return getFitness;
}
set
{
getFitness = value;
}
}
/// <summary>
/// gs种群的大小
/// </summary>
public int PopulationSize
{
get
{
return m_populationSize;
}
set
{
m_populationSize = value;
}
}
/// <summary>
/// gs种群的代数
/// </summary>
public int Generations
{
get
{
return m_generationSize;
}
set
{
m_generationSize = value;
}
}
/// <summary>
/// gs种群的基因长度
/// </summary>
public int GenomeSize
{
get
{
return m_genomeSize;
}
set
{
m_genomeSize = value;
}
}
/// <summary>
/// gs种群的杂交率
/// </summary>
public double CrossoverRate
{
get
{
return m_crossoverRate;
}
set
{
m_crossoverRate = value;
}
}
/// <summary>
/// gs突变率
/// </summary>
public double MutationRate
{
get
{
return m_mutationRate;
}
set
{
m_mutationRate = value;
}
}
/// <summary>
/// gs适应性
/// </summary>
public string FitnessFile
{
get
{
return m_strFitness;
}
set
{
m_strFitness = value;
}
}
/// <summary>
/// Keep previous generation's fittest individual in place of worst in current
/// </summary>
public bool Elitism
{
get
{
return m_elitism;
}
set
{
m_elitism = value;
}
}
#endregion
}
}
/// <summary>
///
/// </summary>
/// <remarks>
/// 种群大小一般可取80~320;
/// 交叉率一般取0.7~0.9;
/// 变异率0.05~0.15就可以了,一般比较小的时候稳定,也容易找到最佳值,但是有时候找不到最佳
/// </remarks>
public class Test
{
// optimal solution for this is (0.5,0.5)
public static double theActualFunction(double[] values)
{
if (values.GetLength(0) != 2)
throw new ArgumentOutOfRangeException("should only have 2 args");
double x = values[0];
double y = values[1];
double n = 9; // should be an int, but I don't want to waste time casting.
double f1 = Math.Pow(15 * x * y * (1 - x) * (1 - y) * Math.Sin(n * Math.PI * x) * Math.Sin(n * Math.PI * y), 2);
return f1;
}
public static void Test1()
{
// Crossover = 80%
// Mutation = 5%
// Population size = 100
// Generations = 2000
// Genome size = 2
GA ga = new GA(0.8, 0.05, 100, 2000, 2);
ga.FitnessFile = "fitness1.txt";
ga.FitnessFunction = new GAFunction(theActualFunction);
ga.Elitism = true;
ga.Go();
double[] values;
double fitness;
ga.GetBest(out values, out fitness);
System.Console.WriteLine("Best ({0}):", fitness);
for (int i = 0; i < values.Length; i++)
System.Console.WriteLine("{0} ", values[i]);
ga.GetWorst(out values, out fitness);
System.Console.WriteLine("\nWorst ({0}):", fitness);
for (int i = 0; i < values.Length; i++)
System.Console.WriteLine("{0} ", values[i]);
}
public static void Main()
{
//Test1();
Test2();
}
public static void Test2()
{
GA ga = new GA(0.8, 0.05, 100, 1000, 10);
ga.FitnessFile = "fitness2.txt";
ga.FitnessFunction = new GAFunction(ToAverage);
ga.Elitism = true;
ga.Go();
double[] values;
double fitness;
ga.GetBest(out values, out fitness);
System.Console.WriteLine("Best ({0}):", fitness);
for (int i = 0; i < values.Length; i++)
System.Console.WriteLine("{0} ", values[i]);
ga.GetWorst(out values, out fitness);
System.Console.WriteLine("\nWorst ({0}):", fitness);
for (int i = 0; i < values.Length; i++)
System.Console.WriteLine("{0} ", values[i]);
}
public static double ToAverage(double[] values)
{
if (values == null || values.Length == 0)
throw new ArgumentException();
double sum = 0.0;
for (int i = 0; i < values.Length; i++)
{
sum += values[i];
}
double ave = sum / values.Length;
sum = 0.0;
for (int i = 0; i < values.Length; i++)
{
sum += Math.Abs(values[i] - ave);
}
return values.Length - sum;
}
}
评论