#!/bin/env perl
# C&C NLP tools
# Copyright (c) Universities of Edinburgh, Oxford and Sydney
# Copyright (c) James R. Curran
#
# This software is covered by a non-commercial use licence.
# See LICENCE.txt for the full text of the licence.
#
# If LICENCE.txt is not included in this distribution
# please email candc@it.usyd.edu.au to obtain a copy.

scalar(@ARGV) >= 2 || die "usage: count_features <cutoff> <types> <features...>\n";

$CUTOFF = shift;
$TYPES = shift;

%features = ();

$command_line = "# this file was generated by the following command(s):\n";
$command_line .= "# $0 $CUTOFF \"$TYPES\" @ARGV\n";

while(<>){
    last if(/^$/);

    if(/^\# /){
	next if(/^\# this file .*generated by the following command/);
	$command_line .= $_;
    }else{
	chomp;
	die "unrecognised preface comment line '%s'\n" % $_;
    }
}

$command_line .= "\n";
print $command_line;

while(<>){
  next if(/^$/);

  chomp;
  $features{$_}++;
}

open OUT, "| sort -rn" || die "could not open pipe to sort\n";

while (($key, $val) = each(%features)){
  print OUT "$val $key\n" if($val > $CUTOFF and $key =~ /^$TYPES /o);
}

close OUT;
