#!/usr/bin/python
# C&C NLP tools
# Copyright (c) Universities of Edinburgh, Oxford and Sydney
# Copyright (c) James R. Curran
#
# This software is covered by a non-commercial use licence.
# See LICENCE.txt for the full text of the licence.
#
# If LICENCE.txt is not included in this distribution
# please email candc@it.usyd.edu.au to obtain a copy.

import sys

SEP = '|'

def usage(s):
  print >> sys.stderr, s
  print >> sys.stderr, "usage: extract_tagged [-t|-s|-w] <pipe_file>"
  sys.exit(1)

if len(sys.argv) != 3:
  usage("incorrect number of arguments")

MODE = sys.argv[1]

if MODE not in ['-t', '-s', '-w']:
  usage("mode must be -t, -s or -w")

print "# this file was generated by the following command(s):"
print "# %s" % (' '.join(sys.argv))
print

sentence = []
for line in open(sys.argv[2]):
  if line == '\n':
    print ' '.join(sentence)
    sentence = []
  elif line.startswith('(<L '):
    if SEP in line:
      print >> sys.stderr, "separator character '%s' found in input line '%s'" % (SEP, line[:-1])
      sys.exit(1)
    fields = line[4:-2].split(' ')
    if MODE == '-s':
      sentence.append(SEP.join((fields[3], fields[2], fields[1])))
    elif MODE == '-t':
      sentence.append(SEP.join((fields[3], fields[2])))
    elif MODE == '-w':
      sentence.append(fields[3])
    else:
      raise "should not get in here"

if sentence:
  print ' '.join(sentence)
