Example: classifying curricula for a job site
In a job site you need to classify curricula to show to the logged users and you can extract a huge quantity of data, but let you extract some of them:- if the employee has some certifications
- if the guy is talkative
- (s)he joined a golf club
- (s)he earned a master degree
case class Person( val hasCertifications : Boolean, val isTalkative : Boolean, val golfClub : Boolean, val hasMasterDegree : Boolean, val job : String ) val persons = Person(hasCertifications = true, isTalkative = false, golfClub = false, hasMasterDegree = true, job = "Programmer") :: Person(hasCertifications = false, isTalkative = false, golfClub = false, hasMasterDegree = true, job = "Junior Programmer") :: Person(hasCertifications = true, isTalkative = false, golfClub = false, hasMasterDegree = false, job = "Programmer") :: Person(hasCertifications = false, isTalkative = true, golfClub = false, hasMasterDegree = true, job = "Seller") :: Person(hasCertifications = false, isTalkative = true, golfClub = false, hasMasterDegree = false, job = "Seller") :: Person(hasCertifications = true, isTalkative = true, golfClub = false, hasMasterDegree = false, job = "Seller") :: Person(hasCertifications = false, isTalkative = true, golfClub = true, hasMasterDegree = true, job = "CEO") :: Person(hasCertifications = false, isTalkative = false, golfClub = true, hasMasterDegree = false, job = "CEO") :: Person(hasCertifications = false, isTalkative = false, golfClub = true, hasMasterDegree = false, job = "CEO") :: NilIn the list persons I put my training set. To use the ID3 algorithm, just import the right object and use the list for training:
import org.scalarecog.decisiontree._ def toVector(p : Person) = Vector(p.hasCertifications, p.isTalkative, p.golfClub, p.hasMasterDegree) val dataset = persons map (p => (toVector(p), p.job)) val tree = new ID3[Boolean,String] buildTree datasetThe reason for toVector is that the ID3 class needs a Vector.
Now tree can classify a person:
val newPerson = Person(false, false, false, false, "?") assert( tree.classify(toVector(newPerson)) == "Junior Programmer" )But it would be fine to see the decision tree created by ID3. With JGraph it's straightforward, and i get this: Hey, it's the real life! ^_^
Here is the full code:
package scalarecoggraph import org.scalarecog.decisiontree._ import javax.swing.JFrame import com.mxgraph.swing.mxGraphComponent import com.mxgraph.view.mxGraph class Program(tree : DecisionTree[Vector[Boolean], String], propertyNames : Vector[String]) extends JFrame("ScalaRecog") { type Tree = DecisionTree[Vector[Boolean], String] type Vertex = (AnyRef, (Double, Double)) draw() def draw() { val graph: mxGraph = new mxGraph val root = graph.getDefaultParent def draw(t : Tree, parentPos : (Double, Double), offset : (Int, Int)) : Vertex = { def createVertex(label : String, action : Vertex => Unit = v => {}) : Vertex = { val vertexSize = (100, 30) val newPos = (parentPos._1 + offset._1, parentPos._2 + offset._2) val created = (graph.insertVertex(root, null, label, newPos._1, newPos._2 , vertexSize._1, vertexSize._2), newPos) action(created) created } def createEdge(label : String, from : Vertex, to : Vertex) = graph.insertEdge(root, null, label, from._1, to._1) t match { case a : DecisionLeaf[Vector[Boolean],String] => createVertex(a.label) case a : DecisionBranchVector[String,Boolean] => createVertex(propertyNames(a.index), n => { for ( ((label, child), index) <- a.branches.zipWithIndex ) createEdge(label.toString, n, draw(child, n._2, (120*index, offset._2))) }) } } graph.getModel.beginUpdate try { draw(tree, (0, 0), (120, 120)) } finally { graph.getModel.endUpdate } getContentPane.add(new mxGraphComponent(graph)) } } object Program { case class Person( val hasCertifications : Boolean, val isTalkative : Boolean, val golfClub : Boolean, val hasMasterDegree : Boolean, val job : String ) def main(args : Array[String]) : Unit = { val persons = Person(hasCertifications = true, isTalkative = false, golfClub = false, hasMasterDegree = true, job = "Programmer") :: Person(hasCertifications = false, isTalkative = false, golfClub = false, hasMasterDegree = true, job = "Junior Programmer") :: Person(hasCertifications = true, isTalkative = false, golfClub = false, hasMasterDegree = false, job = "Programmer") :: Person(hasCertifications = false, isTalkative = true, golfClub = false, hasMasterDegree = true, job = "Seller") :: Person(hasCertifications = false, isTalkative = true, golfClub = false, hasMasterDegree = false, job = "Seller") :: Person(hasCertifications = true, isTalkative = true, golfClub = false, hasMasterDegree = false, job = "Seller") :: Person(hasCertifications = false, isTalkative = true, golfClub = true, hasMasterDegree = true, job = "CEO") :: Person(hasCertifications = false, isTalkative = false, golfClub = true, hasMasterDegree = false, job = "CEO") :: Person(hasCertifications = false, isTalkative = false, golfClub = true, hasMasterDegree = false, job = "CEO") :: Nil def toVector(p : Person) = Vector(p.hasCertifications, p.isTalkative, p.golfClub, p.hasMasterDegree) val dataset = persons map (p => (toVector(p), p.job)) val tree = new ID3[Boolean,String] buildTree dataset val newPerson = Person(false, false, false, false, "?") assert( tree.classify(toVector(newPerson)) == "Junior Programmer" ) val frame = new Program(tree, Vector("Has certifications?", "Is talkative?", "Likes playing golf?", "Has a master degree?")) frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE) frame.setSize(400, 320) frame.setVisible(true) } }Enjoy!