Example: classifying curricula for a job site
In a job site you need to classify curricula to show to the logged users and you can extract a huge quantity of data, but let you extract some of them:- if the employee has some certifications
- if the guy is talkative
- (s)he joined a golf club
- (s)he earned a master degree
case class Person(
val hasCertifications : Boolean,
val isTalkative : Boolean,
val golfClub : Boolean,
val hasMasterDegree : Boolean,
val job : String
)
val persons = Person(hasCertifications = true, isTalkative = false,
golfClub = false, hasMasterDegree = true,
job = "Programmer") ::
Person(hasCertifications = false, isTalkative = false,
golfClub = false, hasMasterDegree = true,
job = "Junior Programmer") ::
Person(hasCertifications = true, isTalkative = false,
golfClub = false, hasMasterDegree = false,
job = "Programmer") ::
Person(hasCertifications = false, isTalkative = true,
golfClub = false, hasMasterDegree = true,
job = "Seller") ::
Person(hasCertifications = false, isTalkative = true,
golfClub = false, hasMasterDegree = false,
job = "Seller") ::
Person(hasCertifications = true, isTalkative = true,
golfClub = false, hasMasterDegree = false,
job = "Seller") ::
Person(hasCertifications = false, isTalkative = true,
golfClub = true, hasMasterDegree = true,
job = "CEO") ::
Person(hasCertifications = false, isTalkative = false,
golfClub = true, hasMasterDegree = false,
job = "CEO") ::
Person(hasCertifications = false, isTalkative = false,
golfClub = true, hasMasterDegree = false,
job = "CEO") ::
Nil
In the list persons I put my training set. To use the ID3 algorithm, just import the right object and use the list for training:
import org.scalarecog.decisiontree._ def toVector(p : Person) = Vector(p.hasCertifications, p.isTalkative, p.golfClub, p.hasMasterDegree) val dataset = persons map (p => (toVector(p), p.job)) val tree = new ID3[Boolean,String] buildTree datasetThe reason for toVector is that the ID3 class needs a Vector.
Now tree can classify a person:
val newPerson = Person(false, false, false, false, "?") assert( tree.classify(toVector(newPerson)) == "Junior Programmer" )But it would be fine to see the decision tree created by ID3. With JGraph it's straightforward, and i get this: Hey, it's the real life! ^_^
Here is the full code:
package scalarecoggraph
import org.scalarecog.decisiontree._
import javax.swing.JFrame
import com.mxgraph.swing.mxGraphComponent
import com.mxgraph.view.mxGraph
class Program(tree : DecisionTree[Vector[Boolean], String], propertyNames : Vector[String]) extends JFrame("ScalaRecog") {
type Tree = DecisionTree[Vector[Boolean], String]
type Vertex = (AnyRef, (Double, Double))
draw()
def draw() {
val graph: mxGraph = new mxGraph
val root = graph.getDefaultParent
def draw(t : Tree, parentPos : (Double, Double), offset : (Int, Int)) : Vertex = {
def createVertex(label : String, action : Vertex => Unit = v => {}) : Vertex = {
val vertexSize = (100, 30)
val newPos = (parentPos._1 + offset._1, parentPos._2 + offset._2)
val created = (graph.insertVertex(root, null, label, newPos._1, newPos._2 , vertexSize._1, vertexSize._2), newPos)
action(created)
created
}
def createEdge(label : String, from : Vertex, to : Vertex) = graph.insertEdge(root, null, label, from._1, to._1)
t match {
case a : DecisionLeaf[Vector[Boolean],String] => createVertex(a.label)
case a : DecisionBranchVector[String,Boolean] =>
createVertex(propertyNames(a.index), n => {
for ( ((label, child), index) <- a.branches.zipWithIndex )
createEdge(label.toString, n, draw(child, n._2, (120*index, offset._2)))
})
}
}
graph.getModel.beginUpdate
try {
draw(tree, (0, 0), (120, 120))
}
finally {
graph.getModel.endUpdate
}
getContentPane.add(new mxGraphComponent(graph))
}
}
object Program {
case class Person(
val hasCertifications : Boolean,
val isTalkative : Boolean,
val golfClub : Boolean,
val hasMasterDegree : Boolean,
val job : String
)
def main(args : Array[String]) : Unit = {
val persons = Person(hasCertifications = true, isTalkative = false, golfClub = false, hasMasterDegree = true, job = "Programmer") ::
Person(hasCertifications = false, isTalkative = false, golfClub = false, hasMasterDegree = true, job = "Junior Programmer") ::
Person(hasCertifications = true, isTalkative = false, golfClub = false, hasMasterDegree = false, job = "Programmer") ::
Person(hasCertifications = false, isTalkative = true, golfClub = false, hasMasterDegree = true, job = "Seller") ::
Person(hasCertifications = false, isTalkative = true, golfClub = false, hasMasterDegree = false, job = "Seller") ::
Person(hasCertifications = true, isTalkative = true, golfClub = false, hasMasterDegree = false, job = "Seller") ::
Person(hasCertifications = false, isTalkative = true, golfClub = true, hasMasterDegree = true, job = "CEO") ::
Person(hasCertifications = false, isTalkative = false, golfClub = true, hasMasterDegree = false, job = "CEO") ::
Person(hasCertifications = false, isTalkative = false, golfClub = true, hasMasterDegree = false, job = "CEO") ::
Nil
def toVector(p : Person) = Vector(p.hasCertifications, p.isTalkative, p.golfClub, p.hasMasterDegree)
val dataset = persons map (p => (toVector(p), p.job))
val tree = new ID3[Boolean,String] buildTree dataset
val newPerson = Person(false, false, false, false, "?")
assert(
tree.classify(toVector(newPerson)) == "Junior Programmer"
)
val frame = new Program(tree, Vector("Has certifications?", "Is talkative?", "Likes playing golf?", "Has a master degree?"))
frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE)
frame.setSize(400, 320)
frame.setVisible(true)
}
}
Enjoy!
