package shef.nlp.supple;
/**
*
*
Title: SUPPLE
* Copyright: Copyright (c) 2003-2006
* @version 1.0
*/
//gate stuff
import gate.Annotation;
import gate.AnnotationSet;
import gate.Document;
import gate.DocumentContent;
import gate.Factory;
import gate.FeatureMap;
import gate.ProcessingResource;
import gate.Resource;
import gate.creole.AbstractLanguageAnalyser;
import gate.creole.ExecutionException;
import gate.creole.ResourceInstantiationException;
import gate.gui.STreeNode;
import gate.util.InvalidOffsetException;
import gate.util.OffsetComparator;
import gate.util.SimpleFeatureMapImpl;
import gate.util.Files;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.io.Serializable;
import java.net.URL;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.Set;
import java.util.StringTokenizer;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import shef.nlp.supple.category.Chart;
import shef.nlp.supple.prolog.Prolog;
import shef.nlp.supple.utils.BestParseOutput;
import shef.nlp.supple.utils.SemOutput;
import shef.nlp.supple.utils.SynOutput;
import shef.nlp.supple.utils.SynSemTriple;
public class SUPPLE extends AbstractLanguageAnalyser implements ProcessingResource, Serializable
{
/** Name of the temp file prefix for the input buchart **/
protected static String InTempFileName = "SUPPLE--IN--";
/** Name of the temp file prefix for the output from buchart **/
protected static String OutTempFileName = "SUPPLE--OUT--";
/** Name of the temp file prefix for the semantic output from buchart **/
protected static String SemTempFileName = "SUPPLE--SEM--";
public File InTempFile, OutTempFile, SemTempFile;
/** The name of the executable BuChart **/
private URL suppleFileUrl;
private File suppleFile;
public void setSUPPLEFile(URL suppleFile) { suppleFileUrl = suppleFile; }
public URL getSUPPLEFile() { return suppleFileUrl; }
/*
protected String syntaxSetName;
public void setSyntaxSetName(String n) { syntaxSetName = n; }
public String getSyntaxSetName() { return syntaxSetName; }
*/
/* where to store the semantics of each parsed chunk */
protected String semanticsSetName;
public void setSemanticsSetName(String n) { semanticsSetName = n; }
public String getSemanticsSetName() { return semanticsSetName; }
/** The document under analysis. */
protected Document document;
public Document getDocument() { return document; }
public void setDocument(Document doc) { document = doc; }
/** the configuration file **/
public URL configFile;
public void setConfigFile(URL configFile) {this.configFile = configFile; }
public URL getConfigFile() { return configFile;}
/** the feature table **/
public URL featureFile;
public void setFeatureFile(URL featureFile) { this.featureFile = featureFile; }
public URL getFeatureFile() { return featureFile;}
/** only pass longest chunk to the chart parser **/
public Boolean longestMatch;
public Boolean getLongestMatch() { return longestMatch;}
public void setLongestMatch(Boolean m) { longestMatch=m;}
/** Different Prolog Implementations **/
private Prolog prolog;
private String prologImpl;
public String getPrologImplementation() { return prologImpl;}
public void setPrologImplementation(String prologImpl) { this.prologImpl=prologImpl; }
/** Debug flag */
private Boolean debug;
public Boolean getDebug() { return debug; }
public void setDebug(Boolean debug) { this.debug = debug; }
/** Gate specification **/
public ArrayList gateAnnotations;
public ArrayList gateConstraints;
public ArrayList gateVariables;
/** Gate annotation sets and annotations to consider */
public Hashtable annotationSetTable;
/** Buchart specification **/
private ArrayList buchartConstraints;
private ArrayList buchartVariables;
/** show mapping Gate-Buchart **/
public void showMapping()
{
for(int i=0;i=0)
{
/* instantiate variables and consider defaults for 'string' and 'text' */
FeatureMap fmvar=(FeatureMap)gateVariables.get(index);
Iterator ite=fmvar.keySet().iterator();
String feature, var, val;
variables=Factory.newFeatureMap();
while(ite.hasNext())
{
feature=(String) ite.next();
var=(String)fmvar.get(feature);
if(fm.containsKey(feature))
{
val=(String)fm.get(feature);
}
else
{
if(feature.compareTo("text")==0)
{
val="body";
}
else if(feature.compareTo("string")==0)
{
val=stringContent.substring(annStart.intValue(),annEnd.intValue());
}
else
{
val = "_";
}
}
variables.put(var,val);
}
FeatureMap fmbuchartvars=(FeatureMap)buchartVariables.get(index);
ite=fmbuchartvars.keySet().iterator();
/* create a feature map for the mapping */
FeatureMap buchartMap=Factory.newFeatureMap();
while(ite.hasNext())
{
feature=(String) ite.next();
var=(String)fmbuchartvars.get(feature);
if(variables.containsKey(var))
{
buchartMap.put(feature,(String) variables.get(var));
}
else
{
buchartMap.put(feature,"_");
}
}
buchartMap.putAll((FeatureMap)buchartConstraints.get(index));
record=new SUPPLERecord((String) buchartMap.get("category"),annStart,annEnd,buchartMap);
buchartList.add(record);
}
else
{
// throw new ExecutionException("Restriction not found for annotation type " + type);
}
}
/* sort the list of buchart categories by offset */
Collections.sort(buchartList,SUPPLERecord.SUPPLERecordComparator());
/* organise by offset and longest match */
if(getLongestMatch().booleanValue()) {
/* keeps one element per valid start offset */
workingList=keepLongest(buchartList,priorityList);
}
else
{
workingList=buchartList;
}
/* list buchart format of each */
String cat;
ArrayList outFeatures;
for(int c=0;c0)
{
previous=(Object[])list.get(0);
preann=(Annotation) previous[1];
prestart=preann.getStartNode().getOffset();
preend=preann.getEndNode().getOffset();
presize=preend.longValue()-prestart.longValue();
working.add(previous);
for(int e=1;epresize)
{
working.remove(working.size()-1);
working.add(pair);
}
}
else if(start.compareTo(preend)>0)
{
working.add(pair);
}
previous=(Object[])working.get(working.size()-1);
preann=(Annotation) previous[1];
prestart=preann.getStartNode().getOffset();
preend=preann.getEndNode().getOffset();
presize=preend.longValue()-prestart.longValue();
}
}
return working;
}
/* the list contains pairs (set name,annotation) and is sorted by annotation offset in ascending order */
public static ArrayList keepLongest(ArrayList list,Hashtable priorities) throws ExecutionException
{
ArrayList working=new ArrayList();
SUPPLERecord current;
SUPPLERecord previous;
long presize;
long cursize;
Long prestart, preend, start, end;
int index=0;
int prepriority;
int curpriority;
String auxCat;
if(list.size()>0)
{
previous=(SUPPLERecord)list.get(0);
auxCat=(String)previous.getCategory();
if(priorities.containsKey(auxCat))
{
prepriority = ( (Integer) priorities.get(previous.getCategory())).intValue();
}
else
{
throw new ExecutionException(auxCat + " not found in feature table");
}
prestart=previous.getStart();
preend=previous.getEnd();
presize=preend.longValue()-prestart.longValue();
working.add(previous);
for(int e=1;epresize)
{
working.remove(working.size()-1);
working.add(current);
}
else if(cursize==presize)
{
if(curpriority=0)
{
working.add(current);
}
previous=(SUPPLERecord)working.get(working.size()-1);
prepriority=((Integer) priorities.get(previous.getCategory())).intValue();
prestart=previous.getStart();
preend=previous.getEnd();
presize=preend.longValue()-prestart.longValue();
}
}
return working;
}
public static int getIndex(String annotationSet,Annotation annotation, ArrayList annotations, ArrayList constraints)
{
int index=-1;
String type=annotation.getType();
FeatureMap fm=annotation.getFeatures();
// System.out.println(" >>" + annotationSet + "<<" );
FeatureMap annfm;
FeatureMap consfm;
String auxType;
String auxSet;
for(int i=0;i=0;b--)
{
aux=(SynOutput)bestParse.get(b);
categ=aux.getCategory();
consti=(new Integer(aux.getConstituens())).intValue();
if(consti==0)
{
/* to the stack */
try
{
stack.add(0, "( " + categ + " \"" +
dc.getContent(new Long(aux.getStart()),
new Long(aux.getEnd())) + "\"" +
" )");
}
catch(InvalidOffsetException ioe)
{
ioe.printStackTrace();
}
// dc.getContent(new Long(aux.getStart()),new Long(aux.getEnd()));
}
else
{
/* create (cat (c1) (c2) .... (cn)) and put it into the stack */
String element="";
for(int c=0;c0)
{
fm=Factory.newFeatureMap();
try
{
id=docAnnotations.add(start,end,"SyntaxTreeNode",fm);
node= new STreeNode(docAnnotations.get(id));
node.setAllowsChildren(true);
}
catch(InvalidOffsetException ioe)
{
ioe.printStackTrace();
}
for(int i=0; i1)
{
//if(category.compareTo("sem_cat")==0) {
/* is a semantic category, simulate a 'tree' structure */
ArrayList neComponents=new ArrayList();
/* get all tokens spanning the start and end */
ArrayList auxList=new ArrayList(auxTokens);
Annotation auxToken;
String tokenCat;
FeatureMap tokenfm;
Long startToken,endToken;
Integer id1;
fm=Factory.newFeatureMap();
father=new ArrayList();
if(!(yourFather.compareTo(new Integer(0))==0))
{
father.add(yourFather);
}
else
{
/* root */
}
fm.put("father",father);
try
{
id=docAnnotations.add(start,end,"SyntaxTreeNode",fm);
node = new STreeNode(docAnnotations.get(id));
node.setAllowsChildren(true);
}
catch(InvalidOffsetException ioe)
{
ioe.printStackTrace();
}
STreeNode node1;
for(int h=0;h0)
{
// System.out.println("creating the annotations");
SynTreeBack(synOut,theAnnotationSet,new Integer(0));
}
/* for tokens without STreeNode we should create one */
Annotation auxToken;
Long tokenStart,tokenEnd;
AnnotationSet auxSynSet;
FeatureMap auxfm;
String tokenCat;
for(int i=0;i0)
{
// System.out.println("creating the annotations");
SynTreeBack(synOut,theAnnotationSet,new Integer(0));
}
/* for tokens without STreeNode we should create one */
Annotation auxToken;
Long tokenStart,tokenEnd;
AnnotationSet auxSynSet;
FeatureMap auxfm;
String tokenCat;
for(int i=0;i