object termwiki2xml extends App {
import java.io.{File,FileOutputStream}
import java.nio.channels.Channels
import scala.xml._
import scala.collection.mutable.{HashMap,Queue,Set}
case class Entry(val exp: String, val lang: String)
case class Term(val id: String, val title: String, val namespace: Int, val entries: Seq[Entry])
def termwiki():Elem = {
XML.loadFile(args(0))
}
def wt2xml(elem:Node):Queue[Node] = {
val terms = elem.text.split("\n")
val entry = HashMap.empty[String, String]
var nodes = Queue[Node]()
terms.foreach(ar => {
if (ar.startsWith("{{")) {
entry += ("Name" -> ar.substring(2))
}
else if (ar.startsWith("}}{{")) {
nodes += entry2xml(entry)
entry.retain((k, v) => v.length < 1)
entry += ("Name" -> ar.substring(4))
}
else if (ar.startsWith("|")) {
entry += (ar.substring(ar.indexOf("|")+1, ar.indexOf("="))
-> ar.substring(ar.indexOf("=")+1))
}
else if (ar.startsWith("}}")) {
nodes += entry2xml(entry)
entry.retain((k, v) => v.length < 1)
}
// else {println(ar)}
})
return nodes
}
def entry2xml(map:HashMap[String, String]):Node = {
var nodes = Queue[Node]()
val title = map.remove("Name").get
//println(title)
map.foreach{ case(k,v) =>
nodes += createNode(k,v)
}
// val language = (nodes \ "lang").text
// val expression = (nodes \ "expression").text
// if (language == "" || expression == "" ) {
// println("Empty node")
//
// }
if (title == "Concept") {
println("Concept: " + nodes)
return {nodes}
}
else {
println(nodes)
return {nodes}
}
}
def createNode(arg: String, v: String): Node = arg match {
case "definition" => {v}
case "language" => {v}
case "expression" => {v}
case _ => {arg}
}
def save(node: Node, fileName: String) = {
val pp = new PrettyPrinter(80, 2)
val fos = new FileOutputStream(fileName)
val writer = Channels.newWriter(fos.getChannel(), "UTF-8")
try {
// writer.write("\n")
writer.write(pp.format(node))
} finally {
writer.close()
}
fileName
}
val xml = termwiki
val page = xml \\ "page"
var colls = Set[Int]()
var langs = Set[String]()
var null_related = Set[String]()
val wikit = (page).map { item =>
val id = item \ "id"
val namespace = (item \ "ns").text
val title = (item \ "title").text
colls += namespace.toInt
val categories = (item \ "revision" \ "text").last
{wt2xml(categories)}
}
val terms = (wikit \\ "Term" ).map { entry =>
val id = (entry \ "@id").text
var title = ""
try {
title = (entry \ "@title").text.split(':')(1)
} catch {
case e: ArrayIndexOutOfBoundsException => {
println(id)
}
}
val namespace = (entry \ "@ns").text.toInt
var entries = (entry \\ "Related").map { item =>
val exp = (item \\ "expression").text
val lang = (item \\ "lang").text
null_related += (item \\ "null").text
langs += lang
if (exp != "" && lang != "") {
Entry(exp, lang)
}
else null
}
// entries += (entry \\ "Concept").map { item =>
// val
// }
// println(entries)
Term(id, title, namespace, entries)
}
def termCenter(ns: Int):Elem =
{terms.map { term =>
if (term.namespace == ns)
{
{ term.entries.map { entry =>
if (entry != null) {
}
}}
}
}}
def termsLang(lang: String, ns: Int):Elem =
{terms.map { term =>
if (term.namespace == ns)
{ term.entries.map { entry =>
if (entry != null && entry.lang == lang) {
{entry.exp}
}
}}
}}
def meta(id: String):Elem =
termwiki
{id}
numeric
{id}
yes
// val langs = List(terms foreach { term =>
// term.entries foreach { entry =>
// entry.lang
// }
// })
println(null_related)
colls.foreach(coll => {
val folder = new File("terms/termwiki-" + coll)
if (!folder.isDirectory()) {
folder.mkdir()
}
save(meta("termwiki-" + coll), "terms/termwiki-" + coll + "/meta.xml")
save(termCenter(coll), "terms/termwiki-" + coll + "/termcenter.xml")
langs.foreach(lang => save(termsLang(lang, coll), "terms/termwiki-" + coll + "/terms-" + lang + ".xml"))
})
}