diff --git a/MHGTagger/CRFTagger.py b/MHGTagger/CRFTagger.py
new file mode 100644
index 0000000000000000000000000000000000000000..6b738e2d3ee536f1beb1bcdef2f29902965f58e5
--- /dev/null
+++ b/MHGTagger/CRFTagger.py
@@ -0,0 +1,110 @@
+
+import sys
+import torch
+from torch import nn
+from torch.nn import functional as F
+
+from .RNNTagger import RNNTagger
+
+
+### auxiliary functions ############################################
+
+def logsumexp(x, dim):
+    """ sums up log-scale values """
+    offset, _ = torch.max(x, dim=dim)
+    offset_broadcasted = offset.unsqueeze(dim) 
+    safe_log_sum_exp = torch.log(torch.exp(x-offset_broadcasted).sum(dim=dim))
+    return safe_log_sum_exp + offset
+
+def lookup(T, indices):
+    """ look up probabilities of tags in a vector, matrix, or 3D tensor """
+    if T.dim() == 3:
+        return T.gather(2, indices.unsqueeze(2)).squeeze(2)
+    elif T.dim() == 2:
+        return T.gather(1, indices.unsqueeze(1)).squeeze(1)
+    elif  T.dim() == 1:
+        return T[indices]
+    else:
+        raise Exception('unexpected tensor size in function "lookup"')
+
+    
+### tagger class ###############################################
+
+class CRFTagger(nn.Module):
+    """ implements a CRF tagger """
+    
+    def __init__(self, num_chars, num_tags, char_emb_size,
+                 char_rec_size, word_rec_size, word_rnn_depth, 
+                 dropout_rate, word_emb_size, beam_size):
+
+        super(CRFTagger, self).__init__()
+
+        # simple LSTMTagger which computes tag scores
+        self.base_tagger = RNNTagger(num_chars, num_tags, char_emb_size,
+                                     char_rec_size, word_rec_size,
+                                     word_rnn_depth, dropout_rate, word_emb_size)
+        self.beam_size = beam_size if 0 < beam_size < num_tags else num_tags
+        self.weights = nn.Parameter(torch.zeros(num_tags, num_tags))
+        self.dropout = nn.Dropout(dropout_rate)
+
+        
+    def forward(self, fwd_charIDs, bwd_charIDs, tags=None):
+
+        annotation_mode = (tags is None)
+
+        scores = self.base_tagger(fwd_charIDs, bwd_charIDs)
+        
+        # extract the highest-scoring tags for each word and their scores
+        best_scores, best_tags = scores.topk(self.beam_size, dim=-1)
+
+        if self.training:  # not done during dev evaluation
+            # check whether the goldstandard tags are among the best tags
+            gs_contained = (best_tags == tags.unsqueeze(1)).sum(dim=-1)
+
+            # replace the tag with the lowest score at each position
+            # by the gs tag if the gs tag is not in the list
+            last_column = gs_contained * best_tags[:,-1] + (1-gs_contained) * tags
+            s = lookup(scores, last_column)
+            best_tags   = torch.cat((best_tags[:,:-1], last_column.unsqueeze(1)), dim=1)
+            best_scores = torch.cat((best_scores[:,:-1], s.unsqueeze(1)), dim=1)
+
+        best_previous = []  # stores the backpointers of the Viterbi algorithm
+        viterbi_scores = best_scores[0]
+        if not annotation_mode:
+            forward_scores = best_scores[0]
+        for i in range(1,scores.size(0)):   # for all word positions except the first
+            # lookup of the tag-pair weights
+            w = self.weights[best_tags[i-1]][:,best_tags[i]]
+            
+            # Viterbi algorithm
+            values = viterbi_scores.unsqueeze(1) + best_scores[i].unsqueeze(0) + w
+            viterbi_scores, best_prev = torch.max(values, dim=0)
+            best_previous.append(best_prev)
+            
+            # Forward algorithm
+            if not annotation_mode:
+                values = forward_scores.unsqueeze(1) + best_scores[i].unsqueeze(0) + w
+                forward_scores = logsumexp(values, dim=0)
+
+        # Viterbi algorithm
+        _, index = torch.max(viterbi_scores, dim=0)
+        best_indices = [index]
+        for i in range(len(best_previous)-1, -1, -1):
+            index = best_previous[i][index]
+            best_indices.append(index)
+
+        # reverse the indices and map them to tag IDs
+        best_indices = torch.stack(best_indices[::-1])
+        predicted_tags = lookup(best_tags, best_indices)
+
+        if annotation_mode:
+            return predicted_tags
+        else:
+            # loss computation
+            basetagger_scores = lookup(scores, tags).sum()
+            CRFweights = self.weights[tags[:-1], tags[1:]].sum() if tags.size(0)>1 else 0
+            logZ = logsumexp(forward_scores, dim=0)  # log partition function
+            logprob = basetagger_scores + CRFweights - logZ
+            
+            return predicted_tags, -logprob
+        
diff --git a/MHGTagger/Data.py b/MHGTagger/Data.py
new file mode 100644
index 0000000000000000000000000000000000000000..9d692895b83db756cd5cd1aff94742b834511630
--- /dev/null
+++ b/MHGTagger/Data.py
@@ -0,0 +1,186 @@
+
+import sys
+from collections import Counter, OrderedDict 
+import pickle
+import numpy
+
+unk_string   = '<UNK>'
+pad_string   = '<PAD>'
+
+def read_tagged_sentences(path, max_sent_len):
+   """
+   Read a dataset.
+   Each line consists of a token and a tag separated by a tab character
+   """
+   sentences, words, tags = [], [], []
+   with open(path) as file:
+      for line in file:
+         line = line.rstrip()
+         if line:
+            word, tag, *_ = line.split("\t")
+            words.append(word)
+            tags.append(tag)
+         else:
+            # empty line marking the end of a sentence
+            if 0 < len(words) < max_sent_len:
+               sentences.append((words, tags))
+            words, tags = [], []
+   return sentences
+
+
+def read_word_embeddings(filename):
+   # Read word embeddings from file.
+   word_embeddings = []
+   if filename is not None:
+      print("reading word embeddings ...", file=sys.stderr)
+      with open(filename) as file:
+         for line in file:
+            word, *vec = line.rstrip().split(' ')
+            if word != unk_string:
+               word_embeddings.append((word, numpy.array(vec, dtype=numpy.float32)))
+      print("done", file=sys.stderr)
+   word_emb_size = len(word_embeddings[0][1]) if word_embeddings else 0
+   return word_embeddings, word_emb_size
+            
+
+def make_dict(counter, min_freq=0, add_pad_symbol=False):
+   """
+   Create a dictionary which maps strings with some minimal frequency to numbers.
+   We don't use pack_padded sequence, so it is OK to assign ID 1 to the
+   padding symbol.
+   """
+   symlist = [unk_string] + ([pad_string] if add_pad_symbol else []) + \
+             [elem for elem,freq in counter.most_common() if freq>=min_freq]
+   string2ID = {elem:i for i,elem in enumerate(symlist)}
+   return string2ID, symlist
+
+
+class Data(object):
+   """
+   class for reading a tagged training and development corpus or a test corpus
+   """
+   
+   IGNORE_INDEX = -100
+
+   def __init__(self, *args):
+      if len(args) == 1:
+         self.init_test(*args)
+      else:
+         self.init_train(*args)
+
+   ### functions needed during training ###############################################
+
+   def init_train(self, path_train, path_dev, word_trunc_len,
+                  min_char_freq, max_sent_len, word_embeddings, ignore_tag):
+
+      self.word_trunc_len = word_trunc_len  # length to which words are truncated or filled up
+
+      # reading the datasets
+      self.train_sentences = read_tagged_sentences(path_train, max_sent_len)
+      self.dev_sentences   = read_tagged_sentences(path_dev, max_sent_len)
+   
+      ### create dictionaries which map characters or tags to IDs
+      char_counter = Counter()
+      tag_counter  = Counter()
+      for words, tags in self.train_sentences:
+         tag_counter.update(tags)
+         for word in words:
+            char_counter.update(word)
+      self.char2ID, _ = make_dict(char_counter, min_char_freq, add_pad_symbol=True)
+
+      if ignore_tag is not None:
+         tag_counter.pop(ignore_tag, None) # remove this special tag if present
+         self.tag2ID, self.ID2tag  = make_dict(tag_counter)
+         self.tag2ID[ignore_tag] = self.IGNORE_INDEX  # empty tags will not be trained
+      else:
+         self.tag2ID, self.ID2tag  = make_dict(tag_counter)
+
+      ### sizes of the symbol inventories
+      self.num_char_types = len(self.char2ID)
+      self.num_tag_types  = len(self.ID2tag)
+
+      self.word_embeddings, self.word_emb_size = read_word_embeddings(word_embeddings)
+      
+
+   def get_charIDs(self, word):
+      '''
+      maps a word to a sequence of character IDs
+      '''
+
+      unkID = self.char2ID[unk_string]
+      padID = self.char2ID[pad_string]
+
+      charIDs = [self.char2ID.get(c, unkID) for c in word]
+
+      # add enough padding symbols
+      fwd_charIDs = [padID] * self.word_trunc_len + charIDs
+      bwd_charIDs = [padID] * self.word_trunc_len + charIDs[::-1]
+
+      # truncate
+      fwd_charIDs = fwd_charIDs[-self.word_trunc_len:]
+      bwd_charIDs = bwd_charIDs[-self.word_trunc_len:]
+
+      return fwd_charIDs, bwd_charIDs
+
+
+   def words2charIDvec(self, words):
+      """
+      converts words to char-ID vectors
+      """
+
+      ### convert words to character ID sequences
+      fwd_charID_seqs = []
+      bwd_charID_seqs = []
+      for word in words:
+         fwd_charIDs, bwd_charIDs = self.get_charIDs(word)
+         fwd_charID_seqs.append(fwd_charIDs)
+         bwd_charID_seqs.append(bwd_charIDs)
+
+      fwd_charID_seqs = numpy.asarray(fwd_charID_seqs, dtype='int32')
+      bwd_charID_seqs = numpy.asarray(bwd_charID_seqs, dtype='int32')
+
+      return fwd_charID_seqs, bwd_charID_seqs
+
+
+   def tags2IDs(self, tags):
+      """
+      takes a list of tags and converts them to IDs using the tag2ID dictionary
+      """
+      unkID = self.tag2ID[unk_string]
+      IDs = [self.tag2ID.get(tag, unkID) for tag in tags]
+      return numpy.asarray(IDs, dtype='int32')
+
+
+   def save_parameters(self, filename):
+      """ save parameters to a file """
+      all_params = (self.word_trunc_len, self.char2ID, self.ID2tag)
+      with open(filename, "wb") as file:
+         pickle.dump(all_params, file)
+
+
+   ### functions needed during tagging ###############################################
+
+   def init_test(self, filename):
+      """ load parameters from a file """
+      with open(filename, "rb") as file:
+         self.word_trunc_len, self.char2ID, self.ID2tag = pickle.load(file)
+
+   def sentences(self, filename):
+      """ read data to be tagged. One token per line. Empty line follows a sentence """
+      with open(filename) as f:
+         words = []
+         for line in f:
+            line = line.rstrip()
+            if line != '':
+               words.append(line)
+            elif len(words) > 0:
+               # empty line indicates the end of a sentence
+               yield words
+               words = []
+   
+   def single_sentences(self, sentence):
+      yield sentence
+
+   def IDs2tags(self, IDs):
+      """ takes a list of IDs and converts them to tags using the ID2tag dictionary """
+      return [self.ID2tag[int(ID)] for ID in IDs]
diff --git a/MHGTagger/RNNTagger.py b/MHGTagger/RNNTagger.py
new file mode 100644
index 0000000000000000000000000000000000000000..4a10b66bf5e257cd45816c9c5ff3103ce2ea11d5
--- /dev/null
+++ b/MHGTagger/RNNTagger.py
@@ -0,0 +1,111 @@
+
+import sys
+import torch
+from torch import nn
+
+
+class WordRepresentation(nn.Module):
+   '''
+   RNN for computing character-based word representations
+   '''
+   def __init__(self, num_chars, emb_size, rec_size, dropout_rate):
+      super().__init__()
+
+      # character embedding lookup table
+      self.embeddings = nn.Embedding(num_chars, emb_size)
+
+      # character-based LSTMs
+      self.fwd_rnn = nn.LSTM(emb_size, rec_size)
+      self.bwd_rnn = nn.LSTM(emb_size, rec_size)
+
+      self.dropout = nn.Dropout(dropout_rate)
+      
+         
+   def forward(self, fwd_charIDs, bwd_charIDs):
+      # swap the 2 dimensions and lookup the embeddings
+      fwd_embs = self.embeddings(fwd_charIDs.t())
+      bwd_embs = self.embeddings(bwd_charIDs.t())
+
+      # run the biLSTM over characters
+      fwd_outputs, _ = self.fwd_rnn(fwd_embs)
+      bwd_outputs, _ = self.bwd_rnn(bwd_embs)
+
+      # concatenate the forward and backward final states to form
+      # word representations
+      word_reprs = torch.cat((fwd_outputs[-1], bwd_outputs[-1]), -1)
+      
+      return word_reprs
+
+
+class ResidualLSTM(nn.Module):
+   ''' Deep BiRNN with residual connections '''
+   
+   def __init__(self, input_size, rec_size, num_rnns, dropout_rate):
+      super().__init__()
+      self.rnn = nn.LSTM(input_size, rec_size, 
+                         bidirectional=True, batch_first=True)
+
+      self.deep_rnns = nn.ModuleList([
+         nn.LSTM(2*rec_size, rec_size, bidirectional=True, batch_first=True)
+         for _ in range(num_rnns-1)])
+      
+      self.dropout = nn.Dropout(dropout_rate)
+
+   def forward(self, state):
+      state, _ = self.rnn(state)
+      for rnn in self.deep_rnns:
+            hidden, _ = rnn(self.dropout(state))
+            state = state + hidden # residual connection
+      return state
+
+
+class RNNTagger(nn.Module):
+   ''' main tagger module '''
+
+   def __init__(self, num_chars, num_tags, char_emb_size, char_rec_size, 
+                word_rec_size, word_rnn_depth, dropout_rate, word_emb_size):
+
+      super().__init__()
+
+      # character-based BiLSTMs
+      self.word_representations = WordRepresentation(num_chars, char_emb_size, 
+                                                     char_rec_size, dropout_rate)
+      # word-based BiLSTM
+      self.word_rnn = ResidualLSTM(char_rec_size*2, word_rec_size, word_rnn_depth,
+                                   dropout_rate)
+      # output feed-forward network
+      self.output_layer = nn.Linear(2*word_rec_size, num_tags)
+
+      # dropout layers
+      self.dropout = nn.Dropout(dropout_rate)
+
+      # word embedding projection layer for finetuning on word embeddings
+      if word_emb_size > 0:
+         self.projection_layer = nn.Linear(2*char_rec_size, word_emb_size)
+
+
+   def forward(self, fwd_charIDs, bwd_charIDs, word_embedding_training=False):
+         
+      # compute the character-based word representations
+      word_reprs = self.word_representations(fwd_charIDs, bwd_charIDs)
+
+      if word_embedding_training:
+         if not hasattr(self, 'projection_layer'):
+            sys.exit("Error: The embedding projection layer is undefined!")
+         # Project the word representations to word embedding vectors
+         # for finetuning on word embeddings as an auxiliary task
+         word_embs = self.projection_layer(word_reprs)
+         return word_embs
+
+      # apply dropout
+      word_reprs = self.dropout(word_reprs)
+         
+      # run the BiLSTM over words
+      reprs = self.word_rnn(word_reprs.unsqueeze(0)).squeeze(0)
+      reprs = self.dropout(reprs)  # and apply dropout
+      
+      # apply the output layers
+      scores = self.output_layer(reprs)
+      
+      return scores
+      
diff --git a/MHGTagger/__pycache__/CRFTagger.cpython-310.pyc b/MHGTagger/__pycache__/CRFTagger.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..59c5064bbb75ae5ba1bb67a97026989e4817485d
Binary files /dev/null and b/MHGTagger/__pycache__/CRFTagger.cpython-310.pyc differ
diff --git a/MHGTagger/__pycache__/CRFTagger.cpython-37.pyc b/MHGTagger/__pycache__/CRFTagger.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d4def17fb7a1ed805b7bc9d9f430babe54410069
Binary files /dev/null and b/MHGTagger/__pycache__/CRFTagger.cpython-37.pyc differ
diff --git a/MHGTagger/__pycache__/CRFTagger.cpython-38.pyc b/MHGTagger/__pycache__/CRFTagger.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f8b1f5d2e1c5aaf40bd84cc63818da0d83f186b3
Binary files /dev/null and b/MHGTagger/__pycache__/CRFTagger.cpython-38.pyc differ
diff --git a/MHGTagger/__pycache__/Data.cpython-37.pyc b/MHGTagger/__pycache__/Data.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..60a78608bfb458782ab91d0c6b1dfd8da9b70daa
Binary files /dev/null and b/MHGTagger/__pycache__/Data.cpython-37.pyc differ
diff --git a/MHGTagger/__pycache__/Data.cpython-38.pyc b/MHGTagger/__pycache__/Data.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a508b228249e607f8e124e2cfcb681cb65cbaf3c
Binary files /dev/null and b/MHGTagger/__pycache__/Data.cpython-38.pyc differ
diff --git a/MHGTagger/__pycache__/NMT.cpython-310.pyc b/MHGTagger/__pycache__/NMT.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8ea23c210be29715e86ed5a4792caa8d204b0e94
Binary files /dev/null and b/MHGTagger/__pycache__/NMT.cpython-310.pyc differ
diff --git a/MHGTagger/__pycache__/NMTData.cpython-310.pyc b/MHGTagger/__pycache__/NMTData.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..95d9e53ddb77087e5d7d9eb71a17f09daf748592
Binary files /dev/null and b/MHGTagger/__pycache__/NMTData.cpython-310.pyc differ
diff --git a/MHGTagger/__pycache__/RNNData.cpython-310.pyc b/MHGTagger/__pycache__/RNNData.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ecfc30fa485f3b10778524c98fb46af8ec497416
Binary files /dev/null and b/MHGTagger/__pycache__/RNNData.cpython-310.pyc differ
diff --git a/MHGTagger/__pycache__/RNNData.cpython-37.pyc b/MHGTagger/__pycache__/RNNData.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b1753831519d35b4f67b321130e996b221e6c017
Binary files /dev/null and b/MHGTagger/__pycache__/RNNData.cpython-37.pyc differ
diff --git a/MHGTagger/__pycache__/RNNData.cpython-38.pyc b/MHGTagger/__pycache__/RNNData.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c4391bd788654ab131a86dc8a789576a92c6d7bc
Binary files /dev/null and b/MHGTagger/__pycache__/RNNData.cpython-38.pyc differ
diff --git a/MHGTagger/__pycache__/RNNTagger.cpython-310.pyc b/MHGTagger/__pycache__/RNNTagger.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..380d43089f35bd30141a22fd8c2c3e94da43f426
Binary files /dev/null and b/MHGTagger/__pycache__/RNNTagger.cpython-310.pyc differ
diff --git a/MHGTagger/__pycache__/RNNTagger.cpython-37.pyc b/MHGTagger/__pycache__/RNNTagger.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..36260cf254193ba40980a4dbcb21ff8b89863e14
Binary files /dev/null and b/MHGTagger/__pycache__/RNNTagger.cpython-37.pyc differ
diff --git a/MHGTagger/__pycache__/RNNTagger.cpython-38.pyc b/MHGTagger/__pycache__/RNNTagger.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c0fd8c4cb7f236f1e8fb4eee812ca3eb7638061a
Binary files /dev/null and b/MHGTagger/__pycache__/RNNTagger.cpython-38.pyc differ
diff --git a/MHGTagger/__pycache__/rnn_annotate.cpython-38.pyc b/MHGTagger/__pycache__/rnn_annotate.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8b42b6b2875d55595d82f178ba44e480a18a8582
Binary files /dev/null and b/MHGTagger/__pycache__/rnn_annotate.cpython-38.pyc differ
diff --git a/MHGTagger/rnn_annotate.py b/MHGTagger/rnn_annotate.py
new file mode 100755
index 0000000000000000000000000000000000000000..fef2524d40d8645588fefbd78fe572c1c8e45d16
--- /dev/null
+++ b/MHGTagger/rnn_annotate.py
@@ -0,0 +1,145 @@
+#!/usr/bin/python3
+
+import sys
+import pickle
+import torch
+from huggingface_hub import hf_hub_download
+
+from .Data import Data
+from .RNNTagger import RNNTagger
+from .CRFTagger import CRFTagger
+
+
+###########################################################################
+# main function
+###########################################################################
+
+class Args:
+   def __init__(self, path_param, model_id, path_data, crf_beam_size, gpu, min_prob, print_probs) -> None:
+      self.path_param = path_param
+      self.model_id = model_id
+      self.path_data = path_data
+      self.crf_beam_size = crf_beam_size
+      self.gpu = gpu
+      self.min_prob = min_prob
+      self.print_probs = print_probs
+
+# if __name__ == "__main__":
+def annotate(tokens, path_param='MHGTagger/tagger', model_id='nielklug/rnn_tagger', path_data='', crf_beam_size=10, gpu=-1, min_prob=-1.0, print_probs=True):
+
+   # parser = argparse.ArgumentParser(description='Annotation program of the RNN-Tagger.')
+
+   # parser.add_argument('path_param', type=str,
+   #                     help='name of parameter file')
+   # parser.add_argument('path_data', type=str,
+   #                     help='name of the file with input data')
+   # parser.add_argument('--crf_beam_size', type=int, default=10,
+   #                     help='size of the CRF beam (if the system contains a CRF layer)')
+   # parser.add_argument('--gpu', type=int, default=0,
+   #                     help='selection of the GPU. The default is: 0 (CPU=-1)')
+   # parser.add_argument("--min_prob", type=float, default=-1.0,
+   #                     help="print all tags whose probability exceeds the probability of the best tag times this threshold")
+   # parser.add_argument("--print_probs", action="store_true", default=False,
+   #                     help="print the tag probabilities")
+
+   args = Args(path_param, model_id, path_data, crf_beam_size, gpu, min_prob, print_probs)
+
+   # Select the processing device
+   if args.gpu >= 0:
+      if not torch.cuda.is_available():
+         print('No gpu available. Using cpu instead.', file=sys.stderr)
+         args.gpu = -1
+      else:
+         if args.gpu >= torch.cuda.device_count():
+            print('gpu '+str(args.gpu)+' not available. Using gpu 0 instead.', file=sys.stderr)
+            args.gpu = 0
+         torch.cuda.set_device(args.gpu)
+   device = torch.device('cuda' if args.gpu >= 0 else 'cpu')
+
+   # load parameters
+   data  = Data(args.path_param+'.io')   # read the symbol mapping tables
+
+   with open(args.path_param+'.hyper', 'rb') as file:
+      hyper_params = pickle.load(file)
+   model = CRFTagger(*hyper_params) if len(hyper_params)==10 \
+           else RNNTagger(*hyper_params)
+   
+   model_file = hf_hub_download(repo_id=args.model_id, filename='tagger.rnn')
+   model.load_state_dict(torch.load(model_file, 
+                         map_location=torch.device('cpu')))
+
+   model = model.to(device)
+
+   if type(model) is CRFTagger:
+      for optvar, option in zip((args.min_prob, args.print_probs),
+                                ("min_prob","print_probs")):
+         if optvar:
+            print(f"Warning: Option --{option} is ignored because the model has a CRF output layer", file=sys.stderr)
+   
+   model.eval()
+   with torch.no_grad():
+      for i, words in enumerate(data.single_sentences(tokens)):
+         # print(i, end='\r', file=sys.stderr, flush=True)
+   
+         # map words to numbers and create Torch variables
+         fwd_charIDs, bwd_charIDs = data.words2charIDvec(words)
+         fwd_charIDs = torch.LongTensor(fwd_charIDs).to(device)
+         bwd_charIDs = torch.LongTensor(bwd_charIDs).to(device)
+         
+         words_all = []
+         tagged = []
+         probs_all = []
+         # run the model
+         if type(model) is RNNTagger:
+            tagscores = model(fwd_charIDs, bwd_charIDs)
+            if args.min_prob == -1.0:
+               # only print the word and tag with the highest score
+               tagIDs = tagscores.argmax(-1)
+               tags = data.IDs2tags(tagIDs.to("cpu"))
+               if not args.print_probs:
+                  for word, tag in zip(words, tags):
+                     # print(word, tag, sep="\t")
+                     words_all.append(word)
+                     tagged.append(tag)
+               else:
+                  # print probabilities as well
+                  tagprobs = torch.nn.functional.softmax(tagscores, dim=-1)
+                  # get the probabilities of the highest-scoring tags
+                  probs = tagprobs[range(len(tagIDs)), tagIDs].to("cpu").tolist()
+                  # print the result
+                  for word, tag, prob in zip(words, tags, probs):
+                     # print(word, tag, round(float(prob), 4), sep="\t")
+                     words_all.append(word)
+                     tagged.append(tag)
+                     probs_all.append(round(float(prob), 4))
+            else:
+               # print the best tags for each word
+               tagprobs = torch.nn.functional.softmax(tagscores, dim=-1)
+               # get the most probable tag and its probability
+               best_probs, _ = tagprobs.max(-1)
+               # get all tags with a probability above best_prob * min_prob
+               thresholds = best_probs * args.min_prob
+               greaterflags = (tagprobs > thresholds.unsqueeze(1))
+               for word, flags, probs in zip(words, greaterflags, tagprobs):
+                  # get the IDs of the best tags
+                  IDs = flags.nonzero()
+                  # get the best tags and their probabilities
+                  best_probs = probs[IDs].to("cpu")
+                  best_tags = data.IDs2tags(IDs.to("cpu"))
+                  # sort the tags by decreasing probability
+                  sorted_list = sorted(zip(best_tags, best_probs), key=lambda x:-x[1])
+                  best_tags, best_probs = zip(*sorted_list)
+                  # generate the output
+                  if args.print_probs:
+                     # append the probabilities to the tags
+                     best_tags = [f"{t} {float(p):.4f}" for t, p in zip(best_tags, best_probs)]
+                  print(word, ' '.join(best_tags), sep="\t")
+         elif type(model) is CRFTagger:
+            tagIDs = model(fwd_charIDs, bwd_charIDs)
+            tags = data.IDs2tags(tagIDs)
+            for word, tag in zip(words, tags):
+               print(word, tag, sep='\t')
+         else:
+            sys.exit('Error')
+   
+         return (words_all, tagged, probs_all)
diff --git a/MHGTagger/tagger.hyper b/MHGTagger/tagger.hyper
new file mode 100644
index 0000000000000000000000000000000000000000..179a6843288c6dc3c92e8eca4c77054cc8205b9d
Binary files /dev/null and b/MHGTagger/tagger.hyper differ
diff --git a/MHGTagger/tagger.io b/MHGTagger/tagger.io
new file mode 100644
index 0000000000000000000000000000000000000000..31328c5b669a578beb8edea6acc5e14414821e13
Binary files /dev/null and b/MHGTagger/tagger.io differ
diff --git a/README.md b/README.md
index 00c3ed890c9c38afd3c49b74628b3dcb959d7581..60755f8ce689be136f00e6a87407db063a79ebd1 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,5 @@
 ---
-title: Mhg Parsing
+title: MHG Parsing
 emoji: 🌍
 colorFrom: gray
 colorTo: red
diff --git a/Tagset_Mappings/POS-mapping.txt b/Tagset_Mappings/POS-mapping.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a81e4816e7a097818e64bf444d38ee30fe5c8378
--- /dev/null
+++ b/Tagset_Mappings/POS-mapping.txt
@@ -0,0 +1,73 @@
+$_	$_
+ADJA	ADJA
+ADJD	ADJD
+ADJN	ADJA.Pos
+ADJS	ADJA
+APPR	APPR
+APPRART	APPRART
+AVD	ADV
+AVD-KO*	ADV
+AVG	PWAV
+AVW	PWAV
+CARDA	CARD
+CARDD	CARD
+CARDN	CARD
+CARDS	CARD
+DDA	PDAT
+DDART	ART
+DDD	PDAT
+DDN	PDAT
+DDS	PDS
+DGA	PWAT
+DGS	PWS
+DIA	PIAT
+DIART	ART
+DID	PDAT
+DIN	PDAT
+DIS	PIS
+DPOSA	PPOSAT
+DPOSD	PPOSS
+DPOSN	PPOSAT
+DPOSS	NN
+DRELS	PRELS
+DWA	PWAT
+DWD	PWS
+DWS	PWS
+FM	FM
+ITJ	ITJ
+KO*	KOUS
+KOKOM	KOKOM
+KON	KON
+KOUS	KOUS
+NA	NN
+NE	NE
+PART	PART
+PAVAP	PROAV
+PAVD	PROAV
+PAVG	PROAV
+PAVW	PWAV
+PG	PWS
+PI	PIS
+PPER	PPER
+PRF	PRF
+PTK	ADV
+PTKA	PTKA
+PTKANT	PTKANT
+PTKNEG	PTKNEG
+PTKVZ	PTKVZ
+PW	PWS
+VAFIN	VAFIN
+VAIMP	VAIMP
+VAINF	VAINF
+VAPP	VAPP
+VAPS	ADJD.Pos
+VMFIN	VMFIN
+VMIMP	VMIMP
+VMINF	VMINF
+VMPP	VMPP
+VMPS	ADJD.Pos
+VVFIN	VVFIN
+VVIMP	VVIMP
+VVINF	VVINF
+VVPP	VVPP
+VVPS	ADJD.Pos
diff --git a/Tagset_Mappings/__pycache__/tag_mapping.cpython-38.pyc b/Tagset_Mappings/__pycache__/tag_mapping.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..84339b214ec8cbcc1e8c38f2a510dd11af1c4e9a
Binary files /dev/null and b/Tagset_Mappings/__pycache__/tag_mapping.cpython-38.pyc differ
diff --git a/Tagset_Mappings/feature-mapping.txt b/Tagset_Mappings/feature-mapping.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0ddd5a04e6001aa9053dcc66ea55bc35b0581ecb
--- /dev/null
+++ b/Tagset_Mappings/feature-mapping.txt
@@ -0,0 +1,11 @@
+Masc,Fem	*
+Fem,Masc	*
+Masc,Neut	*
+Neut,Masc	*
+Fem,Neut	*
+Neut,Fem	*
+Abl	Dat
+Instr	Dat
+Akk	Acc
+Voc	Nom
+bSg	Sg
diff --git a/Tagset_Mappings/tag_mapping.py b/Tagset_Mappings/tag_mapping.py
new file mode 100755
index 0000000000000000000000000000000000000000..9f340164413b6b5ae69a00286ff17d5924579705
--- /dev/null
+++ b/Tagset_Mappings/tag_mapping.py
@@ -0,0 +1,129 @@
+#!/usr/bin/python3
+
+"""
+cd schmid/MHG-Parser/Tagset-Mappings
+python tag-mapping.py ../self-attentive-parser-master/data/mhg/MHG.tagged > ../self-attentive-parser-master/data/mhg/MHG_new.mapped
+"""
+
+import sys
+import fileinput
+
+with open("Tagset_Mappings/POS-mapping.txt") as file:
+    pos_map = dict(line.split() for line in file if line.strip())
+
+with open("Tagset_Mappings/feature-mapping.txt") as file:
+    feature_map = dict(line.split() for line in file if line.strip())
+
+def map_tags(tags):
+    return [map_tag(tag) for tag in tags]
+        
+
+def map_tag(tag):
+    tag.replace('AVD.Comp', 'AVD').replace('AVD.Sup', 'AVD')
+    pos, *features = tag.split(".")
+    pos = pos.split('|')[0]
+    pos = pos_map[pos]
+    pos, *features2 = pos.split(".")
+    features = features2 + features
+    features = [feature_map.get(f, f) for f in features]
+    if pos == 'ADJA':
+        if len(features) == 5:
+            features = [features[0], features[2], features[3], features[1]]
+        elif len(features) in [3,4]:
+            features = [features[0], features[2], '*', features[1]]
+        elif len(features) == 2:
+            features = [features[0], '*', '*', features[1]]
+        elif len(features) == 1:
+            features = [features[0], '*', '*', '*']
+    elif pos in ['ADV', 'CARD']:
+        features = []
+    elif pos in ['ART', 'APPRART']:
+        if len(features) == 4:
+            features = [features[1], features[2], features[0]]
+        elif len(features) in [0, 1]:
+            features = ['*', '*', '*']
+    elif pos == 'NN':
+        if len(features) == 4:
+            features = [features[1], features[2], features[0]]
+        elif len(features) == 0:
+            features = ['*', '*', '*']
+    elif pos == 'NE':
+        if len(features) == 2:
+            features.append('*')
+        elif len(features) == 1:
+            features.extend(['*', '*'])
+    elif pos == 'PDAT':
+        if len(features) == 4:
+            features = [features[1], features[2], features[0]]
+        elif len(features) == 0:
+            features = ['*', '*', '*']
+    elif pos == 'PIAT':
+        if len(features) == 4:
+            features = [features[1], features[2], features[0]]
+        if len(features) == 2:
+            features = [features[1], '*', features[0]]
+        elif len(features) == 0:
+            features = ['*', '*', '*']
+    elif pos == 'PPOSAT':
+        if len(features) in [3, 4]:
+            features = [features[1], features[2], features[0]]
+        elif len(features) == 0:
+            features = ['*', '*', '*']
+    elif pos == 'PWAT' and len(features) == 4:
+        features = [features[1], features[2], features[0]]
+    elif pos == 'PPOSS':
+        features = ['*.*.*']
+    elif pos == 'PDS':
+        if len(features) == 4:
+            features = [features[1], features[2], features[0]]
+        elif len(features) == 1:
+            features.extend(['*', '*'])
+        elif len(features) == 2:
+            features = [features[1], '*', '*']
+    elif pos == 'PIS':
+        if len(features) == 4:
+            features = [features[1], features[2], features[0]]
+        elif len(features) == 0:
+            features = ['*', '*', '*']
+    elif pos == 'PWS':
+        if len(features) == 4:
+            features = [features[1], features[2], features[0]]
+        elif len(features) == 0:
+            features = ['*', '*', '*']
+    elif pos == 'PRELS' and len(features) == 3:
+        features = [features[1], features[2], features[0]]
+    elif pos == 'PPER' and len(features) == 4:
+        features = [features[3], features[1], features[2], features[0]]
+    elif pos == 'PRF' and len(features) == 3:
+        features = ['*', features[0], features[1]]
+    elif pos in ['VAFIN','VMFIN','VVFIN'] and len(features) == 4:
+        features = [features[3], features[2], features[1], features[0]]
+    elif pos in ['VAIMP','VMIMP','VVIMP'] and len(features) == 2:
+        features = [features[1], features[0], 'Imp']
+    elif pos in ['VAINF','VMINF','VVINF'] and len(features) == 0:
+        features = ['Inf']
+    elif pos in ['VAPP','VMPP','VVPP'] and len(features) == 0:
+        features = ['Psp']
+    return '.'.join([pos]+features)
+    
+# for i, line in enumerate(fileinput.input()):
+#     print(i, end="\r", file=sys.stderr)
+#     line = line.strip()
+#     if line:
+#         word, tag, *_ = line.split("\t")
+#         tag = tag.replace('APPR|DDART', 'APPRART')
+#         for t in tag.split("|"):
+#             print(word, map_tag(t), sep="\t")
+#     else:
+#         print()
+
+
+# for i, line in enumerate(fileinput.input()):
+#     print(i, end="\r", file=sys.stderr)
+#     line = line.strip()
+#     if line:
+#         word, tag, *_ = line.split("\t")
+#         tag = tag.replace('APPR|DDART', 'APPRART')
+#         print(word, map_tag(tag.split('|')[0]), sep="\t")
+#     else:
+#         print()
\ No newline at end of file
diff --git a/app.py b/app.py
new file mode 100644
index 0000000000000000000000000000000000000000..3ef49d07b75ffa55f030f98ccd2b1271550dac34
--- /dev/null
+++ b/app.py
@@ -0,0 +1,47 @@
+import streamlit as st
+from parse import parse_text
+import nltk
+from nltk import Tree
+import pandas as pd
+import re
+from nltk.tree.prettyprinter import TreePrettyPrinter
+
+
+st.title("MHG parsing system (demo)")
+text = st.text_area("""This is a simple demo of a Middle High German (MHG) parsing system using delexicalization method.\n\n
+                    Enter some MHG text below!""")
+
+st.text("""Example MHG sentences:
+1. Swer an rehte güete wendet sîn gemüete, dem volget sælde und êre, des gît gewisse 
+lêre künec Artûs der guote, der mit rîters muote nâch lobe kunde strîten.
+2. Uns ist in alten mæren wunders vil geseitvon helden lobebæren, von grôzer arebeit,
+von freuden, hôchgezîten, von weinen und von klagen, von küener recken strîten muget 
+ir nu wunder hœren sagen.""")
+
+nltk.download('punkt')
+
+
+if text:
+  tokens, tags, probs, parse_tree = parse_text(text)
+  
+  # create a table to show the tagged results:
+  zipped = list(zip(tokens, tags, probs))
+  
+  df = pd.DataFrame(zipped, columns=['Token', 'Tag', 'Prob.'])
+  
+  # Convert the bracket parse tree into an NLTK Tree
+  t = Tree.fromstring(re.sub(r'(\.[^ )]+)+', '', parse_tree))
+  
+  tree_svg = TreePrettyPrinter(t).svg(nodecolor='black', leafcolor='black', funccolor='black')
+  
+  col1 = st.columns(1)[0]
+  col1.header("POS tagging result:")
+  col1.table(df)
+  
+  col2 = st.columns(1)[0]
+  col2.header("Parsing result:")
+  col2.write(parse_tree.replace('_', '\_').replace('$', '\$').replace('*', '\*'))
+
+# Display the graph in the Streamlit app
+  col2.image(tree_svg, use_column_width=True)
+    
diff --git a/parse.py b/parse.py
new file mode 100644
index 0000000000000000000000000000000000000000..9213ea943b4759e69edf7787580a986f2ecb1095
--- /dev/null
+++ b/parse.py
@@ -0,0 +1,19 @@
+import re
+from MHGTagger.rnn_annotate import annotate
+from Tagset_Mappings.tag_mapping import map_tags
+from parsing.src.parse import run_parse 
+from nltk import word_tokenize
+
+def parse_text(text):
+    tokens = tokenize(text)
+    tokens, tags, probs = annotate(tokens)
+    tags = map_tags(tags)
+    parse_tree = run_parse(tokens, tags)[0]
+    return tokens, tags, probs, parse_tree
+
+def tokenize(text: str):
+    text = re.sub(r'\s*([.,;:?!"])\s', r' \1 ', text)
+    text = re.sub(r'\s*([.,;:?!"]) ', r' \1 ', text)
+    tokens = word_tokenize(text)
+    return tokens
+    
\ No newline at end of file
diff --git a/parsing/EVALB/COLLINS.prm b/parsing/EVALB/COLLINS.prm
new file mode 100644
index 0000000000000000000000000000000000000000..cb1a2ff04b1f378145519cd121745db9fc14c645
--- /dev/null
+++ b/parsing/EVALB/COLLINS.prm
@@ -0,0 +1,66 @@
+##------------------------------------------##
+## Debug mode                               ##
+##   0: No debugging                        ##
+##   1: print data for individual sentence  ##
+##------------------------------------------##
+DEBUG 0
+
+##------------------------------------------##
+## MAX error                                ##
+##    Number of error to stop the process.  ##
+##    This is useful if there could be      ##
+##    tokanization error.                   ##
+##    The process will stop when this number##
+##    of errors are accumulated.            ##
+##------------------------------------------##
+MAX_ERROR 10
+
+##------------------------------------------##
+## Cut-off length for statistics            ##
+##    At the end of evaluation, the         ##
+##    statistics for the senetnces of length##
+##    less than or equal to this number will##
+##    be shown, on top of the statistics    ##
+##    for all the sentences                 ##
+##------------------------------------------##
+CUTOFF_LEN 40
+
+##------------------------------------------##
+## unlabeled or labeled bracketing          ##
+##    0: unlabeled bracketing               ##
+##    1: labeled bracketing                 ##
+##------------------------------------------##
+LABELED 1                 
+
+##------------------------------------------##
+## Delete labels                            ##
+##    list of labels to be ignored.         ##
+##    If it is a pre-terminal label, delete ##
+##    the word along with the brackets.     ##
+##    If it is a non-terminal label, just   ##
+##    delete the brackets (don't delete     ##
+##    deildrens).                           ##
+##------------------------------------------##
+DELETE_LABEL TOP
+DELETE_LABEL -NONE-
+DELETE_LABEL ,
+DELETE_LABEL :
+DELETE_LABEL ``
+DELETE_LABEL ''
+DELETE_LABEL .
+
+##------------------------------------------##
+## Delete labels for length calculation     ##
+##    list of labels to be ignored for      ##
+##    length calculation purpose            ##
+##------------------------------------------##
+DELETE_LABEL_FOR_LENGTH -NONE-
+
+##------------------------------------------##
+## Equivalent labels, words                 ##
+##     the pairs are considered equivalent  ##
+##     This is non-directional.             ##
+##------------------------------------------##
+EQ_LABEL ADVP PRT
+
+# EQ_WORD  Example example
diff --git a/parsing/EVALB/LICENSE b/parsing/EVALB/LICENSE
new file mode 100644
index 0000000000000000000000000000000000000000..68a49daad8ff7e35068f2b7a97d643aab440eaec
--- /dev/null
+++ b/parsing/EVALB/LICENSE
@@ -0,0 +1,24 @@
+This is free and unencumbered software released into the public domain.
+
+Anyone is free to copy, modify, publish, use, compile, sell, or
+distribute this software, either in source code form or as a compiled
+binary, for any purpose, commercial or non-commercial, and by any
+means.
+
+In jurisdictions that recognize copyright laws, the author or authors
+of this software dedicate any and all copyright interest in the
+software to the public domain. We make this dedication for the benefit
+of the public at large and to the detriment of our heirs and
+successors. We intend this dedication to be an overt act of
+relinquishment in perpetuity of all present and future rights to this
+software under copyright law.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+OTHER DEALINGS IN THE SOFTWARE.
+
+For more information, please refer to <http://unlicense.org/>
diff --git a/parsing/EVALB/Makefile b/parsing/EVALB/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..0fe4ada5512816f87c8769547ce9992a857636a0
--- /dev/null
+++ b/parsing/EVALB/Makefile
@@ -0,0 +1,4 @@
+all: evalb
+
+evalb: evalb.c
+	gcc -Wall -g -o evalb evalb.c
diff --git a/parsing/EVALB/README b/parsing/EVALB/README
new file mode 100644
index 0000000000000000000000000000000000000000..106e927eaf73e1d9aebf5c7dd7e8f4f47748bdf1
--- /dev/null
+++ b/parsing/EVALB/README
@@ -0,0 +1,300 @@
+#################################################################
+#                                                               #
+#      Bug fix and additional functionality for evalb           #
+#                                                               #
+# This updated version of evalb fixes a bug in which sentences  #
+# were incorrectly categorized as "length mismatch" when the    #
+# the parse output had certain mislabeled parts-of-speech.      #
+#                                                               #
+# The bug was the result of evalb treating one of the tags (in  #
+# gold or test) as a label to be deleted (see sections [6],[7]  #
+# for details), but not the corresponding tag in the other.     #
+# This most often occurs with punctuation. See the subdir       #
+# "bug" for an example gld and tst file demonstating the bug,   #
+# as well as output of evalb with and without the bug fix.      #
+#                                                               #
+# For the present version in case of length mismatch, the nodes #
+# causing the imbalance are reinserted to resolve the miscount. #
+# If the lengths of gold and test truly differ, the error is    #
+# still reported. The parameter file "new.prm" (derived from    #
+# COLLINS.prm) shows how to add new potential mislabelings for  #
+# quotes (",``,',`).                                            #
+#                                                               #
+# I have preserved DJB's revision for modern compilers except   #
+# for the delcaration of "exit" which is provided by stdlib.    #
+#                                                               #
+# Other changes:                                                #
+#                                                               #
+# * output of F-Measure in addition to precision and recall     #
+#   (I did not update the documention in section [4] for this)  #
+#                                                               #
+# * more comprehensive DEBUG output that includes bracketing    #
+#   information as evalb is processing each sentence            #
+#   (useful in working through this, and peraps other bugs).    #
+#   Use either the "-D" run-time switch or set DEBUG to 2 in    #
+#   the parameter file.                                         #
+#                                                               #
+# * added DELETE_LABEL lines in new.prm for S1 nodes produced   #
+#   by the Charniak parser and "?", "!" punctuation produced by #
+#   the Bikel parser.                                           #
+#                                                               #
+#                                                               #
+#                                           David Ellis (Brown) #
+#                                                               #
+#                                           January.2006        #
+#################################################################
+
+#################################################################
+#                                                               #
+#      Update of evalb for modern compilers                     #
+#                                                               #
+# This is an updated version of evalb, for use with modern C    #
+# compilers. There are a few updates, each marked in the code:  #
+#                                                               #
+# /* DJB: explanation of comment */                             #
+#                                                               #
+# The updates are purely to help compilation with recent        #
+# versions of GCC (and other C compilers). There are *NO* other #
+# changes to the algorithm itself.                              #
+#                                                               #
+# I have made these changes following recommendations from      #
+# users of the Corpora Mailing List, especially Peet Morris and #
+# Ramon Ziai.                                                   #
+#                                                               #
+#                                     David Brooks (Birmingham) #
+#                                                               #
+#                                     September.2005            #
+#################################################################
+
+#################################################################
+#                                                               #
+#      README file for evalb                                    #
+#                                                               #
+#                                         Satoshi Sekine (NYU)  #
+#                                         Mike Collins (UPenn)  #
+#                                                               #
+#                                         October.1997          #
+#################################################################
+
+Contents of this README:
+
+   [0] COPYRIGHT
+   [1] INTRODUCTION
+   [2] INSTALLATION AND RUN
+   [3] OPTIONS
+   [4] OUTPUT FORMAT FROM THE SCORER
+   [5] HOW TO CREATE A GOLDFILE FROM THE TREEBANK
+   [6] THE PARAMETER FILE
+   [7] MORE DETAILS ABOUT THE SCORING ALGORITHM
+
+
+[0] COPYRIGHT
+
+The authors abandon the copyright of this program. Everyone is 
+permitted to copy and distribute the program or a portion of the program
+with no charge and no restrictions unless it is harmful to someone.
+
+However, the authors are delightful for the user's kindness of proper
+usage and letting the authors know bugs or problems.
+
+This software is provided "AS IS", and the authors make no warranties,
+express or implied.
+
+To legally enforce the abandonment of copyright, this package is released
+under the Unlicense (see LICENSE).
+
+[1] INTRODUCTION
+
+Evaluation of bracketing looks simple, but in fact, there are minor
+differences from system to system. This is a program to parametarize
+such minor differences and to give an informative result.
+
+"evalb" evaluates bracketing accuracy in a test-file against a gold-file.
+It returns recall, precision, tagging accuracy. It uses an identical 
+algorithm to that used in (Collins ACL97).
+
+
+[2] Installation and Run
+
+To compile the scorer, type 
+
+> make
+
+
+To run the scorer:
+
+> evalb -p Parameter_file Gold_file Test_file
+
+ 
+For example to use the sample files:
+
+> evalb -p sample.prm sample.gld sample.tst
+
+
+
+[3] OPTIONS
+
+You can specify system parameters in the command line options.
+Other options concerning to evaluation metrix should be specified
+in parameter file, described later.
+
+        -p param_file  parameter file                        
+        -d             debug mode                            
+        -e n           number of error to kill (default=10)  
+        -h             help                                  
+
+
+
+[4] OUTPUT FORMAT FROM THE SCORER
+
+The scorer gives individual scores for each sentence, for
+example:
+
+  Sent.                        Matched  Bracket   Cross        Correct Tag
+ ID  Len.  Stat. Recal  Prec.  Bracket gold test Bracket Words  Tags Accracy
+============================================================================
+   1    8    0  100.00 100.00     5      5    5      0      6     5    83.33
+
+At the end of the output the === Summary === section gives statistics 
+for all sentences, and for sentences <=40 words in length. The summary
+contains the following information:
+
+i)   Number of sentences -- total number of sentences.
+
+ii)  Number of Error/Skip sentences -- should both be 0 if there is no
+    problem with the parsed/gold files.
+
+iii) Number of valid sentences = Number of sentences - Number of Error/Skip
+    sentences 
+
+iv)  Bracketing recall =     (number of correct constituents)
+                         ----------------------------------------
+                         (number of constituents in the goldfile)
+
+v)   Bracketing precision = (number of correct constituents)
+                         ----------------------------------------
+                         (number of constituents in the parsed file)
+
+vi)  Complete match = percentaage of sentences where recall and precision are
+    both 100%. 
+
+vii) Average crossing = (number of constituents crossing a goldfile constituen
+                         ----------------------------------------------------
+                                        (number of sentences)
+
+viii) No crossing = percentage of sentences which have 0 crossing brackets.
+
+ix)   2 or less crossing = percentage of sentences which have <=2 crossing brackets.
+
+x)    Tagging accuracy = percentage of correct POS tags (but see [5].3 for exact
+     details of what is counted).
+
+
+
+[5] HOW TO CREATE A GOLDFILE FROM THE PENN TREEBANK
+
+
+The gold and parsed files are in a format similar to this:
+
+(TOP (S (INTJ (RB No)) (, ,) (NP (PRP it)) (VP (VBD was) (RB n't) (NP (NNP Black) (NNP Monday))) (. .)))
+
+To create a gold file from the treebank:
+
+tgrep -wn '/.*/' | tgrep_proc.prl 
+
+will produce a goldfile in the required format.  ("tgrep -wn '/.*/'" prints
+parse trees, "tgrep_process.prl" just skips blank lines).
+
+For example, to produce a goldfile for section 23 of the treebank:
+
+tgrep -wn '/.*/' | tail +90895 | tgrep_process.prl | sed 2416q > sec23.gold
+
+
+
+[6] THE PARAMETER (.prm) FILE
+
+
+The .prm file sets options regarding the scoring method. COLLINS.prm gives
+the same scoring behaviour as the scorer used in (Collins 97). The options 
+chosen were: 
+
+1) LABELED 1
+
+to give labelled precision/recall figures, i.e. a constituent must have the
+same span *and* label as a constituent in the goldfile.
+
+2) DELETE_LABEL TOP   
+
+Don't count the "TOP" label (which is always given in the output of tgrep) 
+when scoring. 
+
+3) DELETE_LABEL -NONE-  
+
+Remove traces (and all constituents which dominate nothing but traces) when
+scoring. For example
+
+.... (VP (VBD reported) (SBAR (-NONE- 0) (S (-NONE- *T*-1)))) (. .)))
+
+would be processed to give
+
+.... (VP (VBD reported)) (. .)))
+
+
+4)
+DELETE_LABEL ,     -- for the purposes of scoring remove punctuation
+DELETE_LABEL :
+DELETE_LABEL ``
+DELETE_LABEL ''
+DELETE_LABEL .
+
+5) DELETE_LABEL_FOR_LENGTH -NONE-   -- don't include traces when calculating
+                                       the length of a sentence (important
+                                       when classifying a sentence as <=40
+                                       words or >40 words)
+
+6) EQ_LABEL ADVP PRT
+
+Count ADVP and PRT as being the same label when scoring.
+
+
+
+
+[7] MORE DETAILS ABOUT THE SCORING ALGORITHM
+
+
+1) The scorer initially processes the files to remove all nodes specified
+by DELETE_LABEL in the .prm file. It also recursively removes nodes which
+dominate nothing due to all their children being removed. For example, if
+-NONE- is specified as a label to be deleted, 
+
+.... (VP (VBD reported) (SBAR (-NONE- 0) (S (-NONE- *T*-1)))) (. .)))
+
+would be processed to give
+
+.... (VP (VBD reported)) (. .)))
+
+2) The scorer also removes all functional tags attached to non-terminals
+(functional tags are prefixed with "-" or "=" in the treebank). For example
+"NP-SBJ" is processed to give "NP", "NP=2" is changed to "NP".
+
+
+3) Tagging accuracy counts tags for all words *except* any tags which are
+deleted by a DELETE_LABEL specification in the .prm file. (For example, for
+COLLINS.prm, punctuation tagged as "," ":" etc. would not be included).
+
+4) When calculating the length of a sentence, all words with POS tags not 
+included in the "DELETE_LABEL_FOR_LENGTH" list in the .prm file are
+counted. (For COLLINS.prm, only "-NONE-" is specified in this list, so
+traces are removed before calculating the length of the sentence).
+
+5) There are some subtleties in scoring when either the goldfile or parsed
+file contains multiple constituents for the same span which have the same
+non-terminal label. e.g. (NP (NP the man)) If the goldfile contains n 
+constituents for the same span, and the parsed file contains m constituents
+with that nonterminal, the scorer works as follows:
+
+i) If m>n, then the precision is n/m, recall is 100%
+
+ii) If n>m, then the precision is 100%, recall is m/n.
+
+iii) If n==m, recall and precision are both 100%.
diff --git a/parsing/EVALB/bug/bug.gld b/parsing/EVALB/bug/bug.gld
new file mode 100644
index 0000000000000000000000000000000000000000..288a25400f7939d2de2379c46f69ec1e91f0df04
--- /dev/null
+++ b/parsing/EVALB/bug/bug.gld
@@ -0,0 +1,5 @@
+(TOP (S (NP-SBJ (DT The)  (NN Thy-1)  (NN gene)  (NN promoter) ) (VP (VBZ resembles)  (NP (DT a)  (`` ")  (JJ housekeeping)  ('' ")  (NN promoter) ) (PP (IN in)  (SBAR (IN that)  (S (NP-SBJ-68 (PRP it) ) (VP-COOD (VP (VBZ is)  (ADJP-PRD (JJ located)  (PP (IN within)  (NP (DT a)  (JJ methylation-free)  (NN island) )))) (, ,)  (VP (VBZ lacks)  (NP (DT a)  (JJ canonical)  (NN TATA)  (NN box) )) (, ,)  (CC and)  (VP (VBZ displays)  (NP (NN heterogeneity) ) (PP (IN in)  (NP (NP (DT the)  (JJ 5'-end)  (NNS termini) ) (PP (IN of)  (NP (DT the)  (NN mRNA) )))))))))) (. .) ) )
+(TOP (S (NP-SBJ (DT The)  (JJ latter)  (`` ")  (NP (NP (JJ nuclear)  (NN factor) ) (PP (IN for)  (NP (VBN activated)  (NN T)  (NNS cells) ))) ('' ") ) (ADVP (RB likely) ) (VP (VBZ contributes)  (PP (TO to)  (NP (NP (DT the)  (NN tissue)  (NN specificity) ) (PP (IN of)  (NP (NN IL-2)  (NN gene)  (NN expression) ))))) (. .) ) )
+(TOP (S (ADVP (RB Thus) ) (, ,)  (NP-SBJ (PRP we) ) (VP (VBD postulated)  (SBAR-COOD (SBAR (IN that)  (S (NP-SBJ (NP (DT the)  (JJ circadian)  (NN modification) ) (PP (IN of)  (NP (NN GR) ))) (VP (VBD was)  (ADJP-PRD (JJ independent)  (PP (IN of)  (NP-COOD (NP (NP (DT the)  (JJ diurnal)  (NNS fluctuations) ) (PP (IN in)  (NP (NN plasma)  (NN cortisol)  (NN level) ))) (CC or)  (NP (NP (DT the)  (JJ circadian)  (NNS variations) ) (PP (IN in)  (NP (JJ environmental)  (NN lighting) ))))))))) (CC and)  (SBAR (IN that)  (S (NP-SBJ-79 (DT the)  (NN rhythmicity) ) (VP (MD might)  (VP (VB be)  (VP (VBN regulated)  (NP (-NONE- *-79) ) (PP (IN by)  (NP-LGS (NP (DT the)  (`` ')  (JJ circadian)  (NN pacemaker)  ('' ') ) (ADJP (JJ located)  (PP (IN in)  (NP (DT the)  (JJ human)  (JJ basal)  (NN brain) )))))))))))) (. .) ) )
+(TOP (S (NP-SBJ-70 (JJ Such)  (NN transcription)  (NNS factors) ) (VP (VBP play)  (NP (DT a)  (JJ key)  (NN role) ) (PP (IN in)  (NP (NP (DT the)  (NN development) ) (PP (IN of)  (NP (DT the)  (JJ mature)  (NN T-cell)  (NN phenotype) )))) (PP (IN by)  (S (NP-SBJ (-NONE- *-70) ) (VP (VBG functioning)  (PP (IN as)  (`` ')  (NP (NP (JJ master)  (NNS regulators) ) (PP (IN of)  (NP (NN T-cell)  (NN differentiation) ))) ('' ') ))))) (. .) ) )
+(TOP (S (NP-SBJ (NP (DT The)  (NN conversion) ) (PP (IN of)  (NP (DT the)  (NN TCEd) )) (PP (TO to)  (NP (DT a)  (`` ')  (JJ perfect)  ('' ')  (NN NF-kB)  (NN binding)  (NN site) ))) (VP-COOD (VP (VBZ leads)  (PP (TO to)  (NP-19 (NP (DT a)  (JJR tighter)  (NN binding) ) (PP (IN of)  (NP (NN NF-kB) )) (PP (TO to)  (NP (NN TCEd)  (NN DNA) ))))) (CC and)  (, ,)  (VP (PP (IN as)  (NP (DT a)  (JJ functional)  (NN consequence) )) (, ,)  (PP (TO to)  (NP=19 (NP (DT the)  (NN activity) ) (PP (IN of)  (NP (DT the)  (`` ')  (VBN converted)  ('' ')  (NN TCEd)  (NNS motifs) )) (PP (IN in)  (NP (NN HeLa)  (NNS cells) )))))) (. .) ) )
diff --git a/parsing/EVALB/bug/bug.rsl-new b/parsing/EVALB/bug/bug.rsl-new
new file mode 100644
index 0000000000000000000000000000000000000000..4b143a2a283e278ec844cbf1a5b37269cb2d7387
--- /dev/null
+++ b/parsing/EVALB/bug/bug.rsl-new
@@ -0,0 +1,39 @@
+Sent.                        Matched  Bracket   Cross        Correct Tag
+ID  Len.  Stat. Recal  Prec.  Bracket gold test Bracket Words  Tags Accracy
+============================================================================
+1   37    0   77.27  65.38    17     22   26      5     34    27    79.41
+2   21    0   69.23  64.29     9     13   14      2     20    16    80.00
+3   47    0   80.00  82.35    28     35   34      4     44    40    90.91
+4   26    0   35.29  37.50     6     17   16      8     25    18    72.00
+5   44    0   42.31  33.33    11     26   33     17     38    28    73.68
+============================================================================
+              62.83  57.72    71   113   123      0    161   129    80.12
+=== Summary ===
+
+-- All --
+Number of sentence        =      5
+Number of Error sentence  =      0
+Number of Skip  sentence  =      0
+Number of Valid sentence  =      5
+Bracketing Recall         =  62.83
+Bracketing Precision      =  57.72
+Bracketing FMeasure       =  60.17
+Complete match            =   0.00
+Average crossing          =   7.20
+No crossing               =   0.00
+2 or less crossing        =  20.00
+Tagging accuracy          =  80.12
+
+-- len<=40 --
+Number of sentence        =      3
+Number of Error sentence  =      0
+Number of Skip  sentence  =      0
+Number of Valid sentence  =      3
+Bracketing Recall         =  61.54
+Bracketing Precision      =  57.14
+Bracketing FMeasure       =  59.26
+Complete match            =   0.00
+Average crossing          =   5.00
+No crossing               =   0.00
+2 or less crossing        =  33.33
+Tagging accuracy          =  77.22
diff --git a/parsing/EVALB/bug/bug.rsl-old b/parsing/EVALB/bug/bug.rsl-old
new file mode 100644
index 0000000000000000000000000000000000000000..3f10bc04a014f090ac80690e96daebb576c61931
--- /dev/null
+++ b/parsing/EVALB/bug/bug.rsl-old
@@ -0,0 +1,45 @@
+Sent.                        Matched  Bracket   Cross        Correct Tag
+ID  Len.  Stat. Recal  Prec.  Bracket gold test Bracket Words  Tags Accracy
+============================================================================
+1 : Length unmatch (33|35)
+   1   37    1    0.00   0.00     0      0    0      0      0     0     0.00
+2 : Length unmatch (19|21)
+   2   21    1    0.00   0.00     0      0    0      0      0     0     0.00
+3 : Length unmatch (44|45)
+   3   47    1    0.00   0.00     0      0    0      0      0     0     0.00
+4 : Length unmatch (24|26)
+   4   26    1    0.00   0.00     0      0    0      0      0     0     0.00
+5 : Length unmatch (38|39)
+   5   44    1    0.00   0.00     0      0    0      0      0     0     0.00
+============================================================================
+      0     0     0.00
+
+=== Summary ===
+
+-- All --
+Number of sentence        =      5
+Number of Error sentence  =      5
+Number of Skip  sentence  =      0
+Number of Valid sentence  =      0
+Bracketing Recall         =   0.00
+Bracketing Precision      =   0.00
+Bracketing FMeasure       =    nan
+Complete match            =   0.00
+Average crossing          =   0.00
+No crossing               =   0.00
+2 or less crossing        =   0.00
+Tagging accuracy          =   0.00
+
+-- len<=40 --
+Number of sentence        =      3
+Number of Error sentence  =      3
+Number of Skip  sentence  =      0
+Number of Valid sentence  =      0
+Bracketing Recall         =   0.00
+Bracketing Precision      =   0.00
+Bracketing FMeasure       =    nan
+Complete match            =   0.00
+Average crossing          =   0.00
+No crossing               =   0.00
+2 or less crossing        =   0.00
+Tagging accuracy          =   0.00
diff --git a/parsing/EVALB/bug/bug.tst b/parsing/EVALB/bug/bug.tst
new file mode 100644
index 0000000000000000000000000000000000000000..d6b51942b4bbc45c2ec029b713cfcbc5c0117e38
--- /dev/null
+++ b/parsing/EVALB/bug/bug.tst
@@ -0,0 +1,5 @@
+(S1 (S (NP (DT The) (JJ Thy-1) (NN gene) (NN promoter)) (VP (VP (VBZ resembles) (NP (NP (DT a) (ADJP (CD ") (NN housekeeping)) (NN ") (NN promoter)) (SBAR (WHPP (IN in) (WHNP (WDT that))) (S (NP (PRP it)) (VP (VBZ is) (VP (VBN located) (PP (IN within) (NP (DT a) (JJ methylation-free) (NN island))))))))) (, ,) (VP (VBZ lacks) (NP (DT a) (JJ canonical) (NNP TATA) (NN box))) (, ,) (CC and) (VP (VBZ displays) (NP (NP (NN heterogeneity)) (PP (IN in) (NP (NP (DT the) (JJ 5'-end) (NNS termini)) (PP (IN of) (NP (DT the) (NN mRNA)))))))) (. .)))
+(S1 (S (NP (NP (DT The) (JJ latter) (CD ") (JJ nuclear) (NN factor)) (PP (IN for) (NP (VBN activated) (NN T) (NNS cells)))) (VP (VBZ ") (ADJP (JJ likely) (S (VP (VBZ contributes) (PP (TO to) (NP (NP (DT the) (NN tissue) (NN specificity)) (PP (IN of) (NP (JJ IL-2) (NN gene) (NN expression))))))))) (. .)))
+(S1 (S (ADVP (RB Thus)) (, ,) (NP (PRP we)) (VP (VBD postulated) (SBAR (SBAR (IN that) (S (NP (NP (DT the) (JJ circadian) (NN modification)) (PP (IN of) (NP (NNP GR)))) (VP (VBD was) (ADJP (JJ independent) (PP (IN of) (NP (DT the) (JJ diurnal) (NNS fluctuations)))) (PP (IN in) (NP (NP (NN plasma) (JJ cortisol) (NN level)) (CC or) (NP (NP (DT the) (JJ circadian) (NNS variations)) (PP (IN in) (NP (JJ environmental) (NN lighting))))))))) (CC and) (SBAR (IN that) (S (NP (DT the) (NN rhythmicity)) (VP (MD might) (VP (VB be) (VP (VBN regulated) (PP (IN by) (NP (DT the) ('' ') (NP (JJ circadian) (NN pacemaker) (POS ')) (VP (VBN located) (PP (IN in) (NP (DT the) (JJ human) (JJ basal) (NN brain))))))))))))) (. .)))
+(S1 (S (NP (JJ Such) (NN transcription) (NNS factors)) (VP (VBP play) (NP (NP (DT a) (JJ key) (NN role)) (PP (IN in) (NP (NP (DT the) (NN development)) (PP (IN of) (NP (NP (DT the) (JJ mature) (JJ T-cell) (NN phenotype)) (PP (IN by) (NP (NP (NN functioning) (RB as) (POS ')) (NN master) (NNS regulators))))) (PP (IN of) (NP (JJ T-cell) (NN differentiation) (POS '))))))) (. .)))
+(S1 (S (NP (NP (DT The) (NN conversion)) (PP (IN of) (NP (DT the)))) (VP (VBD TCEd) (PP (TO to) (NP (NP (DT a) ('' ') (JJ perfect) ('' ') (NN NF-kB)) (SBAR (S (NP (JJ binding) (NN site)) (VP (VBZ leads) (PP (TO to) (NP (NP (NP (DT a) (ADJP (RBR tighter) (JJ binding)) (PP (IN of) (NP (NP (NNS NF-kB)) (PP (PP (TO to) (NP (JJ TCEd) (NN DNA))) (CC and) (PP (, ,) (PP (IN as) (NP (DT a) (JJ functional) (NN consequence))) (, ,) (TO to) (NP (NP (DT the) (NN activity)) (PP (IN of) (NP (DT the)))))))) (POS ')) (JJ converted) ('' ') (JJ TCEd) (NNS motifs)) (PP (IN in) (NP (NNP HeLa) (NNS cells))))))))))) (. .)))
diff --git a/parsing/EVALB/evalb b/parsing/EVALB/evalb
new file mode 100755
index 0000000000000000000000000000000000000000..908d298243a797a7c666252ac79fd646fd5b34c5
Binary files /dev/null and b/parsing/EVALB/evalb differ
diff --git a/parsing/EVALB/evalb.c b/parsing/EVALB/evalb.c
new file mode 100644
index 0000000000000000000000000000000000000000..9a3be2de2df01868fc1bbb6993bcee94766825b1
--- /dev/null
+++ b/parsing/EVALB/evalb.c
@@ -0,0 +1,1537 @@
+/*****************************************************************/
+/* evalb [-p param_file] [-dh] [-e n] gold-file test-file        */
+/*                                                               */
+/*        Evaluate bracketing in test-file against gold-file.    */
+/*        Return recall, precision, tagging accuracy.            */
+/*                                                               */
+/*   <option>                                                    */
+/*        -p param_file  parameter file                          */
+/*        -d             debug mode                              */
+/*        -e n           number of error to kill (default=10)    */
+/*        -h             help                                    */
+/*                                                               */
+/*                                         Satoshi Sekine (NYU)  */
+/*                                         Mike Collins (UPenn)  */
+/*                                                               */
+/*                                         October.1997          */
+/*                                                               */
+/* Please refer README for the update information                */
+/*****************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h> //### added for exit, atoi decls
+#include <ctype.h>
+#include <string.h>
+
+
+/* Internal Data format -------------------------------------------*/
+/*                                                                 */
+/* (S (NP (NNX this)) (VP (VBX is) (NP (DT a) (NNX pen))) (SYM .)) */
+/*                                                                 */
+/*   wn=5                                                          */
+/*                        word    label                            */
+/*   terminal[0] =        this     NNX                             */
+/*   terminal[1] =        is       VBX                             */
+/*   terminal[2] =        a        DT                              */
+/*   terminal[3] =        pen      NNX                             */
+/*   terminal[4] =        .        SYM                             */
+/*                                                                 */
+/*   bn=4                                                          */
+/*                      start     end      label                   */
+/*   bracket[0]  =        0        5         S                     */
+/*   bracket[1]  =        0        0         NP                    */
+/*   bracket[2]  =        1        4         VP                    */
+/*   bracket[3]  =        2        4         NP                    */
+/*                                                                 */
+/*              matched bracketing                                 */
+/*   Recall = ---------------------------                          */
+/*             # of bracket in ref-data                            */
+/*                                                                 */
+/*              matched bracketing                                 */
+/*   Recall = ---------------------------                          */
+/*             # of bracket in test-data                           */
+/*                                                                 */
+/*-----------------------------------------------------------------*/
+
+/******************/
+/* constant macro */
+/******************/
+
+#define MAX_SENT_LEN           5000
+#define MAX_WORD_IN_SENT        200
+#define MAX_BRACKET_IN_SENT     200
+#define MAX_WORD_LEN            100
+#define MAX_LABEL_LEN            30
+#define MAX_QUOTE_TERM           20
+
+#define MAX_DELETE_LABEL        100
+#define MAX_EQ_LABEL            100
+#define MAX_EQ_WORD             100
+
+#define MAX_LINE_LEN            500
+
+#define DEFAULT_MAX_ERROR        10
+#define DEFAULT_CUT_LEN          40
+
+/*************/
+/* structure */
+/*************/
+
+typedef struct ss_terminal {
+    char word[MAX_WORD_LEN];
+    char label[MAX_LABEL_LEN];
+    int  result;                /* 0:unmatch, 1:match, 9:undef */
+} s_terminal;
+
+typedef struct ss_term_ind {
+	s_terminal term;
+	int index;
+   int bracket;
+   int endslen;
+   int ends[MAX_BRACKET_IN_SENT];
+} s_term_ind;
+
+typedef struct ss_bracket {
+    int start;
+    int end;
+    unsigned int buf_start;
+    unsigned int buf_end;
+    char label[MAX_LABEL_LEN];
+    int  result;                 /* 0: unmatch, 1:match, 5:delete 9:undef */
+} s_bracket;
+
+
+typedef struct ss_equiv {
+    char *s1;
+    char *s2;
+} s_equiv;
+
+
+/****************************/
+/* global variables         */
+/*   gold-data: suffix = 1  */
+/*   test-data: suffix = 2  */
+/****************************/
+
+/*---------------*/
+/* Sentence data */
+/*---------------*/
+int wn1, wn2;                              /* number of words in sentence  */
+int r_wn1;                                 /* number of words in sentence  */
+                                           /* which only ignores labels in */
+                                           /* DELETE_LABEL_FOR_LENGTH      */
+
+s_terminal terminal1[MAX_WORD_IN_SENT];    /* terminal information */
+s_terminal terminal2[MAX_WORD_IN_SENT];
+
+s_term_ind quotterm1[MAX_QUOTE_TERM];      /* special terminals ("'","POS") */
+s_term_ind quotterm2[MAX_QUOTE_TERM];
+
+int bn1, bn2;                              /* number of brackets */
+
+int r_bn1, r_bn2;                          /* number of brackets */
+                                           /* after deletion */
+
+s_bracket bracket1[MAX_BRACKET_IN_SENT];   /* bracket information */
+s_bracket bracket2[MAX_BRACKET_IN_SENT];
+
+
+/*------------*/
+/* Total data */
+/*------------*/
+int TOTAL_bn1, TOTAL_bn2, TOTAL_match;     /* total number of brackets */
+int TOTAL_sent;                            /* No. of sentence */
+int TOTAL_error_sent;                      /* No. of error sentence */
+int TOTAL_skip_sent;                       /* No. of skip sentence */
+int TOTAL_comp_sent;                       /* No. of complete match sent */
+int TOTAL_word;                            /* total number of word */
+int TOTAL_crossing;                        /* total crossing */
+int TOTAL_no_crossing;                     /* no crossing sentence */
+int TOTAL_2L_crossing;                     /* 2 or less crossing sentence */
+int TOTAL_correct_tag;                     /* total correct tagging */
+
+int TOT_cut_len = DEFAULT_CUT_LEN;         /* Cut-off length in statistics */
+
+                                 /* data for sentences with len <= CUT_LEN */
+                                 /* Historically it was 40.                */
+int TOT40_bn1, TOT40_bn2, TOT40_match;     /* total number of brackets */
+int TOT40_sent;                            /* No. of sentence */
+int TOT40_error_sent;                      /* No. of error sentence */
+int TOT40_skip_sent;                       /* No. of skip sentence */
+int TOT40_comp_sent;                       /* No. of complete match sent */
+int TOT40_word;                            /* total number of word */
+int TOT40_crossing;                        /* total crossing */
+int TOT40_no_crossing;                     /* no crossing sentence */
+int TOT40_2L_crossing;                     /* 2 or less crossing sentence */
+int TOT40_correct_tag;                     /* total correct tagging */
+
+/*------------*/
+/* miscallous */
+/*------------*/
+int Line;                                  /* line number */
+int Error_count = 0;                       /* Error count */
+int Status;                                /* Result status for each sent */
+                                           /*    0: OK, 1: skip, 2: error */
+
+/*-------------------*/
+/* stack manuplation */
+/*-------------------*/
+int stack_top;
+int stack[MAX_BRACKET_IN_SENT];
+
+/************************************************************/
+/* User parameters which can be specified in parameter file */
+/************************************************************/
+
+/*------------------------------------------*/
+/* Debug mode                               */
+/*   print out data for individual sentence */
+/*------------------------------------------*/
+int DEBUG=0;
+
+/*------------------------------------------*/
+/* MAX error                                */
+/*    Number of error to stop the process.  */
+/*    This is useful if there could be      */
+/*    tokanization error.                   */
+/*    The process will stop when this number*/
+/*    of errors are accumulated.            */
+/*------------------------------------------*/
+int Max_error = DEFAULT_MAX_ERROR;
+
+/*------------------------------------------*/
+/* Cut-off length for statistics            */
+/*    int TOT_cut_len = DEFAULT_CUT_LEN;    */
+/*    (Defined above)                       */
+/*------------------------------------------*/
+
+
+/*------------------------------------------*/
+/* unlabeled or labeled bracketing          */
+/*    0: unlabeled bracketing               */
+/*    1: labeled bracketing                 */
+/*------------------------------------------*/
+int F_label    = 1;                 
+
+/*------------------------------------------*/
+/* Delete labels                            */
+/*    list of labels to be ignored.         */
+/*    If it is a pre-terminal label, delete */
+/*    the word along with the brackets.     */
+/*    If it is a non-terminal label, just   */
+/*    delete the brackets (don't delete     */
+/*    childrens).                           */
+/*------------------------------------------*/
+char *Delete_label[MAX_DELETE_LABEL];
+int Delete_label_n = 0;
+
+/*------------------------------------------*/
+/* Delete labels for length calculation     */
+/*    list of labels to be ignored for      */
+/*    length calculation purpose            */
+/*------------------------------------------*/
+char *Delete_label_for_length[MAX_DELETE_LABEL];
+int Delete_label_for_length_n = 0;
+
+/*------------------------------------------*/
+/* Labels to be considered for misquote     */
+/*    (could be possesive or quote)         */
+/*------------------------------------------*/
+char *Quote_term[MAX_QUOTE_TERM];
+int Quote_term_n = 0;
+
+/*------------------------------------------*/
+/* Equivalent labels, words                 */
+/*     the pairs are considered equivalent  */
+/*     This is non-directional.             */
+/*------------------------------------------*/
+s_equiv EQ_label[MAX_EQ_LABEL];
+int EQ_label_n = 0;
+
+s_equiv EQ_word[MAX_EQ_WORD];
+int EQ_word_n = 0;
+
+
+
+/************************/
+/* Function return-type */
+/************************/
+int main();
+void init_global();
+void print_head();
+void init();
+void read_parameter_file();
+void set_param();
+int narg();
+int read_line();
+
+void pushb();
+int popb();
+int stackempty();
+
+void calc_result(unsigned char *buf1,unsigned char *buf);
+void fix_quote();
+void reinsert_term();
+void massage_data();
+void modify_label();
+void individual_result();
+void print_total();
+void dsp_info();
+int is_terminator();
+int is_deletelabel();
+int is_deletelabel_for_length();
+int is_quote_term();
+int word_comp();
+int label_comp();
+
+void Error();
+void Fatal();
+void Usage();
+
+/* ### provided by std headers 
+int fprintf();
+int printf();
+int atoi();
+int fclose();
+int sscanf();
+*/
+
+/***********/
+/* program */
+/***********/
+#define ARG_CHECK(st) if(!(*++(*argv) || (--argc && *++argv))){ \
+			 fprintf(stderr,"Missing argument: %s\n",st); \
+		      }
+
+int
+main(argc,argv)
+int argc;
+char *argv[];
+{
+    char *filename1, *filename2;
+    FILE *fd1, *fd2;
+    unsigned char buff[5000];
+    unsigned char buff1[5000];
+
+    filename1=NULL;
+    filename2=NULL;
+
+    for(argc--,argv++;argc>0;argc--,argv++){
+	if(**argv == '-'){
+	    while(*++(*argv)){
+		switch(**argv){
+
+		  case 'h':    /* help */
+		    Usage();
+		    exit(1);
+
+		  case 'd':      /* debug mode */
+		    DEBUG = 1;
+		    goto nextarg;
+
+		  case 'D':      /* debug mode */
+		    DEBUG = 2;
+		    goto nextarg;
+
+		  case 'c':      /* cut-off length */
+		    ARG_CHECK("cut-off length for statistices");
+		    TOT_cut_len = atoi(*argv);
+		    goto nextarg;
+
+		  case 'e':      /* max error */
+		    ARG_CHECK("number of error to kill");
+		    Max_error = atoi(*argv);
+		    goto nextarg;
+
+		  case 'p':      /* parameter file */
+		    ARG_CHECK("parameter file");
+		    read_parameter_file(*argv);
+		    goto nextarg;
+
+		  default:
+		    Usage();
+		    exit(0);
+		}
+	    }
+	} else {
+	    if(filename1==NULL){
+		filename1 = *argv;
+	    }else if(filename2==NULL){
+		filename2 = *argv;
+	    }
+	}
+      nextarg: continue;
+    }
+
+    init_global();
+
+
+    if((fd1 = fopen(filename1,"r"))==NULL){
+	Fatal("Can't open gold file (%s)\n",filename1);
+    }
+    if((fd2 = fopen(filename2,"r"))==NULL){
+	Fatal("Can't open test file (%s)\n",filename2);
+    }
+
+    print_head();
+
+    for(Line=1;fgets(buff,5000,fd1)!=NULL;Line++){
+    
+	init();
+
+      /* READ 1 */
+	r_wn1 = read_line(buff,terminal1,quotterm1,&wn1,bracket1,&bn1);
+
+	strcpy(buff1,buff);
+
+      /* READ 2 */
+	if(fgets(buff,5000,fd2)==NULL){
+	    Error("Number of lines unmatch (too many lines in gold file)\n");
+	    break;
+	}
+
+	read_line(buff,terminal2,quotterm2,&wn2,bracket2,&bn2);
+
+      /* Calculate result and print it */
+	calc_result(buff1,buff);
+
+	if(DEBUG>=1){
+	    dsp_info();
+	}
+    }
+
+    if(fgets(buff,5000,fd2)!=NULL){
+	Error("Number of lines unmatch (too many lines in test file)\n");
+    }
+
+    print_total();
+
+    return (0);
+}
+
+
+/*-----------------------------*/
+/* initialize global variables */
+/*-----------------------------*/
+void
+init_global()
+{
+    TOTAL_bn1 = TOTAL_bn2 = TOTAL_match = 0;
+    TOTAL_sent = TOTAL_error_sent = TOTAL_skip_sent = TOTAL_comp_sent = 0;
+    TOTAL_word = TOTAL_correct_tag = 0;
+    TOTAL_crossing = 0;
+    TOTAL_no_crossing = TOTAL_2L_crossing = 0;
+
+    TOT40_bn1 = TOT40_bn2 = TOT40_match = 0;
+    TOT40_sent = TOT40_error_sent = TOT40_skip_sent = TOT40_comp_sent = 0;
+    TOT40_word = TOT40_correct_tag = 0;
+    TOT40_crossing = 0;
+    TOT40_no_crossing = TOT40_2L_crossing = 0;
+
+}
+
+
+/*------------------*/
+/* print head title */
+/*------------------*/
+void
+print_head()
+{
+    printf("  Sent.                        Matched  Bracket   Cross        Correct Tag\n");
+    printf(" ID  Len.  Stat. Recal  Prec.  Bracket gold test Bracket Words  Tags Accracy\n");
+    printf("============================================================================\n");
+}
+
+
+/*-----------------------------------------------*/
+/* initialization at each individual computation */
+/*-----------------------------------------------*/
+void
+init()
+{
+  int i;
+
+  wn1 = 0;
+  wn2 = 0;
+  bn1 = 0;
+  bn2 = 0;
+  r_bn1 = 0;
+  r_bn2 = 0;
+
+  for(i=0;i<MAX_WORD_IN_SENT;i++){
+      terminal1[i].word[0]  = '\0';
+      terminal1[i].label[0] = '\0';
+      terminal1[i].result   = 9;
+      terminal2[i].word[0]  = '\0';
+      terminal2[i].label[0] = '\0';
+      terminal2[i].result   = 9;
+  }
+
+  for(i=0;i<MAX_QUOTE_TERM;i++){
+      quotterm1[i].term.word[0]  = '\0';
+      quotterm1[i].term.label[0] = '\0';
+      quotterm1[i].term.result   = 9;
+      quotterm1[i].index         = -1;
+      quotterm1[i].bracket       = -1;
+      quotterm2[i].term.word[0]  = '\0';
+      quotterm2[i].term.label[0] = '\0';
+      quotterm2[i].term.result   = 9;
+      quotterm2[i].index         = -1;
+      quotterm2[i].bracket       = -1;
+  }
+
+  for(i=0;i<MAX_BRACKET_IN_SENT;i++){
+      bracket1[i].start    = -1;
+      bracket1[i].end      = -1;
+      bracket1[i].label[0] = '\0';
+      bracket1[i].result   = 9;
+      bracket2[i].start    = -1;
+      bracket2[i].end      = -1;
+      bracket2[i].label[0] = '\0';
+      bracket2[i].result   = 9;
+  }
+
+  Status = 0;
+}
+
+/*----------------*/
+/* parameter file */
+/*----------------*/
+void
+read_parameter_file(filename)
+char *filename;
+{
+    char buff[MAX_LINE_LEN];
+    FILE *fd;
+    int line;
+    int i;
+
+    if((fd=fopen(filename,"r"))==NULL){
+	Fatal("Can't open parameter file (%s)\n",filename);
+    }
+
+    for(line=1;fgets(buff,MAX_LINE_LEN,fd)!=NULL;line++){
+
+      /* clean up the tail and find unvalid line */
+      /*-----------------------------------------*/
+	for(i=strlen(buff)-1;i>0 && (isspace(buff[i]) || buff[i]=='\n');i--){
+	    buff[i]='\0';
+	}
+	if(buff[0]=='#' ||      /* comment-line */
+	   strlen(buff)<3){     /* too short, just ignore */
+	    continue;
+	}
+
+      /* place the parameter and value */
+      /*-------------------------------*/
+	for(i=0;!isspace(buff[i]);i++);
+	for(;isspace(buff[i]) && buff[i]!='\0';i++);
+	if(buff[i]=='\0'){
+	    fprintf(stderr,"Empty value in parameter file (%d)\n",line);
+	}
+
+      /* set parameter and value */
+      /*-------------------------*/
+	set_param(buff,buff+i);
+    }
+
+    fclose(fd);
+}
+
+
+#define STRNCMP(s) (strncmp(param,s,strlen(s))==0 &&  \
+		    (param[strlen(s)]=='\0' || isspace(param[strlen(s)])))
+
+
+void
+set_param(param,value)
+char *param, *value;
+{
+    char l1[MAX_LABEL_LEN], l2[MAX_LABEL_LEN];
+
+    if(STRNCMP("DEBUG")){
+
+	DEBUG = atoi(value);
+
+    }else if(STRNCMP("MAX_ERROR")){
+
+	Max_error = atoi(value);
+
+    }else if(STRNCMP("CUTOFF_LEN")){
+
+	TOT_cut_len = atoi(value);
+
+    }else if(STRNCMP("LABELED")){
+
+	F_label = atoi(value);
+
+    }else if(STRNCMP("DELETE_LABEL")){
+
+	Delete_label[Delete_label_n] = (char *)malloc(strlen(value)+1);
+	strcpy(Delete_label[Delete_label_n],value);
+	Delete_label_n++;
+
+    }else if(STRNCMP("DELETE_LABEL_FOR_LENGTH")){
+
+	Delete_label_for_length[Delete_label_for_length_n] = (char *)malloc(strlen(value)+1);
+	strcpy(Delete_label_for_length[Delete_label_for_length_n],value);
+	Delete_label_for_length_n++;
+
+    }else if(STRNCMP("QUOTE_LABEL")){
+
+	Quote_term[Quote_term_n] = (char *)malloc(strlen(value)+1);
+	strcpy(Quote_term[Quote_term_n],value);
+	Quote_term_n++;
+
+    }else if(STRNCMP("EQ_LABEL")){
+
+	if(narg(value)!=2){
+	    fprintf(stderr,"EQ_LABEL requires two values\n");
+	    return;
+	}
+	sscanf(value,"%s %s",l1,l2);
+	EQ_label[EQ_label_n].s1 = (char *)malloc(strlen(l1)+1);
+	strcpy(EQ_label[EQ_label_n].s1,l1);
+	EQ_label[EQ_label_n].s2 = (char *)malloc(strlen(l2)+1);
+	strcpy(EQ_label[EQ_label_n].s2,l2);
+	EQ_label_n++;
+
+    }else if(STRNCMP("EQ_WORD")){
+
+	if(narg(value)!=2){
+	    fprintf(stderr,"EQ_WORD requires two values\n");
+	    return;
+	}
+	sscanf(value,"%s %s",l1,l2);
+	EQ_word[EQ_word_n].s1 = (char *)malloc(strlen(l1)+1);
+	strcpy(EQ_word[EQ_word_n].s1,l1);
+	EQ_word[EQ_word_n].s2 = (char *)malloc(strlen(l2)+1);
+	strcpy(EQ_word[EQ_word_n].s2,l2);
+	EQ_word_n++;
+
+    }else{
+
+	fprintf(stderr,"Unknown keyword (%s) in parameter file\n",param);
+
+    }
+}
+
+
+int
+narg(s)
+char *s;
+{
+    int n;
+
+    for(n=0;*s!='\0';){
+	for(;isspace(*s);s++);
+	if(*s=='\0'){
+	    break;
+	}
+	n++;
+	for(;!isspace(*s);s++){
+	    if(*s=='\0'){
+		break;
+	    }
+	}
+    }
+
+    return(n);
+}
+
+/*-----------------------------*/
+/* Read line and gather data.  */
+/* Return langth of sentence.  */
+/*-----------------------------*/
+int
+read_line(buff, terminal, quotterm, wn, bracket, bn)
+char *buff;
+s_terminal terminal[];
+s_term_ind quotterm[];
+int *wn;
+s_bracket bracket[];
+int *bn;
+{
+    char *p, *q, label[MAX_LABEL_LEN], word[MAX_WORD_LEN];
+    int	  qt;		  /* quote term counter */
+    int   wid, bid;       /* word ID, bracket ID */
+    int   n;              /* temporary remembering the position */
+    int   b;              /* temporary remembering bid */
+    int   i;
+    int   len;            /* length of the sentence */
+
+    len = 0;
+    stack_top=0;
+
+    for(p=buff,qt=0,wid=0,bid=0;*p!='\0';){
+
+	if(isspace(*p)){
+	    p++;
+	    continue;
+
+        /* open bracket */
+        /*--------------*/
+	}else if(*p=='('){
+
+	    n=wid;
+	    for(p++,i=0;!is_terminator(*p);p++,i++){
+		label[i]=*p;
+	    }
+	    label[i]='\0';
+
+	    /* Find terminals */
+	    q = p;
+	    if(isspace(*q)){
+		for(q++;isspace(*q);q++);
+		for(i=0;!is_terminator(*q);q++,i++){
+		    word[i]=*q;
+		}
+		word[i]='\0';
+
+                /* compute length */
+		if(*q==')' && !is_deletelabel_for_length(label)==1){
+		    len++;
+		}
+      if (DEBUG>1)
+         printf("label=%s, word=%s, wid=%d\n",label,word,wid);
+		/* quote terminal */
+		if(*q==')' && is_quote_term(label,word)==1){
+			strcpy(quotterm[qt].term.word,word);
+			strcpy(quotterm[qt].term.label,label);
+			quotterm[qt].index = wid;
+         quotterm[qt].bracket = bid;
+         quotterm[qt].endslen = stack_top;
+         //quotterm[qt].ends = (int*)malloc(stack_top*sizeof(int));
+         memcpy(quotterm[qt].ends,stack,stack_top*sizeof(int));
+			qt++;
+		}
+		
+                /* delete terminal */
+		if(*q==')' && is_deletelabel(label)==1){
+		    p = q+1;
+		    continue;
+
+		/* valid terminal */
+		}else if(*q==')'){
+		    strcpy(terminal[wid].word,word);
+		    strcpy(terminal[wid].label,label);
+		    wid++;
+		    p = q+1;
+		    continue;
+
+                /* error */
+		}else if(*q!='('){
+		    Error("More than two elements in a bracket\n");
+		}
+	    }
+
+            /* otherwise non-terminal label */
+	    bracket[bid].start = wid;
+	    bracket[bid].buf_start = p-buff;
+	    strcpy(bracket[bid].label,label);
+	    pushb(bid);
+	    bid++;
+
+	/* close bracket */
+        /*---------------*/
+	}else if(*p==')'){
+
+	    b = popb();
+	    bracket[b].end = wid;
+	    bracket[b].buf_end = p-buff;
+	    p++;
+
+        /* error */
+        /*-------*/
+	}else{
+
+	    Error("Reading sentence\n");
+	}
+    }
+
+    if(!stackempty()){
+	Error("Bracketing is unbalanced (too many open bracket)\n");
+    }
+
+    *wn = wid;
+    *bn = bid;
+
+    return(len);
+}
+
+
+/*----------------------*/
+/* stack operation      */
+/* for bracketing pairs */
+/*----------------------*/
+void
+pushb(item)
+int item;
+{
+    stack[stack_top++]=item;
+}
+
+int
+popb()
+{
+    int item;
+
+    item = stack[stack_top-1];
+
+    if(stack_top-- < 0){
+	Error("Bracketing unbalance (too many close bracket)\n");
+    }
+    return(item);
+}
+
+int
+stackempty()
+{
+    if(stack_top==0){
+	return(1);
+    }else{
+	return(0);
+    }
+}
+
+
+/*------------------*/
+/* calculate result */
+/*------------------*/
+void
+calc_result(unsigned char *buf1,unsigned char *buf)
+{
+    int i, j, l;
+    int match, crossing, correct_tag;
+
+    int last_i = -1;
+
+    char my_buf[1000];
+    int match_found = 0;
+    
+    char match_j[200];
+    for (j = 0; j < bn2; ++j) {
+      match_j[j] = 0;
+    }
+
+    /* ML */
+    if (DEBUG>1)
+    	printf("\n");
+
+
+    /* Find skip and error */
+    /*---------------------*/
+    if(wn2==0){
+	Status = 2;
+	individual_result(0,0,0,0,0,0);
+	return;
+    }
+
+   if(wn1 != wn2){
+      //if (DEBUG>1)
+    //Error("Length unmatch (%d|%d)\n",wn1,wn2);
+	fix_quote();
+	if(wn1 != wn2){
+		Error("Length unmatch (%d|%d)\n",wn1,wn2);
+		individual_result(0,0,0,0,0,0);
+		return;
+	}
+    }
+
+    for(i=0;i<wn1;i++){
+	if(word_comp(terminal1[i].word,terminal2[i].word)==0){
+	    Error("Words unmatch (%s|%s)\n",terminal1[i].word,
+                                            terminal2[i].word);
+	    individual_result(0,0,0,0,0,0);
+	    return;
+	}
+    }
+
+    /* massage the data */
+    /*------------------*/
+    massage_data();
+	   
+    /* matching brackets */
+    /*-------------------*/
+    match = 0;
+    for(i=0;i<bn1;i++){
+      for(j=0;j<bn2;j++){
+
+         if (DEBUG>1)
+   printf("1.res=%d, 2.res=%d, 1.start=%d, 2.start=%d, 1.end=%d, 2.end=%d\n",bracket1[i].result,bracket2[j].result,bracket1[i].start,bracket2[j].start,bracket1[i].end,bracket2[j].end);
+
+	// does bracket match?
+	if(bracket1[i].result != 5 && 
+	   bracket2[j].result == 0 &&
+	   bracket1[i].start == bracket2[j].start && bracket1[i].end == bracket2[j].end) {
+
+	  // (1) do we not care about the label or (2) does the label match?
+	  if (F_label==0 || label_comp(bracket1[i].label,bracket2[j].label)==1) {
+	    bracket1[i].result = bracket2[j].result = 1;
+	    match++;
+	    match_found = 1;
+	    break;
+	  } else {
+	    if (DEBUG>1) {
+	      printf("  LABEL[%d-%d]: ",bracket1[i].start,bracket1[i].end-1);
+	      l = bracket1[i].buf_end-bracket1[i].buf_start;
+	      strncpy(my_buf,buf1+bracket1[i].buf_start,l);
+	      my_buf[l] = '\0';
+	      printf("%s\n",my_buf);
+	    }
+	    match_found = 1;
+	    match_j[j] = 1;
+	  }
+	}
+      }
+
+      if (!match_found && bracket1[i].result != 5 && DEBUG>1) {
+	/* ### ML 09/28/03: gold bracket with no corresponding test bracket */
+	printf("  BRACKET[%d-%d]: ",bracket1[i].start,bracket1[i].end-1);
+	l = bracket1[i].buf_end-bracket1[i].buf_start;
+	strncpy(my_buf,buf1+bracket1[i].buf_start,l);
+	my_buf[l] = '\0';
+	printf("%s\n",my_buf);
+      }
+      match_found = 0;
+    }
+
+    for(j=0;j<bn2;j++){
+      if (bracket2[j].result==0 && !match_j[j] && DEBUG>1) {
+	/* test bracket with no corresponding gold bracket */
+	printf("  EXTRA[%d-%d]: ",bracket2[j].start,bracket2[j].end-1);
+	l = bracket2[j].buf_end-bracket2[j].buf_start;
+	strncpy(my_buf,buf+bracket2[j].buf_start,l);
+	my_buf[l] = '\0';
+	printf("%s\n",my_buf);
+      }
+    }
+
+    /* crossing */
+    /*----------*/
+    crossing = 0;
+
+    /* crossing is counted based on the brackets */
+    /* in test rather than gold file (by Mike)   */
+    for(j=0;j<bn2;j++){
+      for(i=0;i<bn1;i++){
+	if(bracket1[i].result != 5 &&
+	   bracket2[j].result != 5 &&
+	   ((bracket1[i].start < bracket2[j].start &&
+	     bracket1[i].end   > bracket2[j].start &&
+	     bracket1[i].end   < bracket2[j].end) ||
+	    (bracket1[i].start > bracket2[j].start &&
+	     bracket1[i].start < bracket2[j].end &&
+	     bracket1[i].end   > bracket2[j].end))){
+
+	  /* ### ML 09/01/03: get details on cross-brackettings */
+	  if (i != last_i) {
+	    if (DEBUG>1) {
+	    	printf("  CROSSING[%d-%d]: ",bracket1[i].start,bracket1[i].end-1);
+	    	l = bracket1[i].buf_end-bracket1[i].buf_start;
+	    	strncpy(my_buf,buf1+bracket1[i].buf_start,l);
+	    	my_buf[l] = '\0';
+	    	printf("%s\n",my_buf);
+
+	    	/* ML
+	    	printf("\n  CROSSING at bracket %d:\n",i-1);
+	    	printf("  GOLD (tokens %d-%d): ",bracket1[i].start,bracket1[i].end-1);
+	    	l = bracket1[i].buf_end-bracket1[i].buf_start;
+	    	strncpy(my_buf,buf1+bracket1[i].buf_start,l);
+	    	my_buf[l] = '\0';
+	    	printf("%s\n",my_buf);
+	    	*/
+	    }
+	    last_i = i;
+	  }
+
+	  /* ML
+	  printf("  TEST (tokens %d-%d): ",bracket2[j].start,bracket2[j].end-1);
+	  l = bracket2[j].buf_end-bracket2[j].buf_start;
+	  strncpy(my_buf,buf+bracket2[j].buf_start,l);
+	  my_buf[l] = '\0';
+	  printf("%s\n",my_buf);
+	  */
+
+	  crossing++;
+	  break;
+	}
+      }
+    }
+
+    /* Tagging accuracy */
+    /*------------------*/
+    correct_tag=0;
+    for(i=0;i<wn1;i++){
+       if(label_comp(terminal1[i].label,terminal2[i].label)==1){
+          terminal1[i].result = terminal2[i].result = 1;
+          correct_tag++;
+       } else {
+          terminal1[i].result = terminal2[i].result = 0;
+       }
+    }
+
+    individual_result(wn1,r_bn1,r_bn2,match,crossing,correct_tag);
+}
+
+void
+fix_quote()
+{
+   int i,j,k;
+   if (DEBUG>1) {
+      for(i=0;i<MAX_QUOTE_TERM;i++){
+         if (quotterm1[i].index!=-1)
+            printf("%d: %s - %s\n",quotterm1[i].index,
+                  quotterm1[i].term.label,
+                  quotterm1[i].term.word);
+         if (quotterm2[i].index!=-1)
+            printf("%d: %s - %s\n",quotterm2[i].index,
+                  quotterm2[i].term.label,
+                  quotterm2[i].term.word);
+      }
+   }
+   for(i=0;i<MAX_QUOTE_TERM;i++) {
+      int ind = quotterm2[i].index;
+      if (ind!=-1) {
+         for(j=0;j<MAX_QUOTE_TERM;j++){
+            if (quotterm1[j].index==ind &&
+                  strcmp(quotterm1[j].term.label,
+                     quotterm2[i].term.label)!=0) {
+               if (is_deletelabel(quotterm1[j].term.label) && !is_deletelabel(quotterm2[i].term.label)) {
+                  reinsert_term(&quotterm1[j],terminal1,bracket1,&wn1);
+                  for (k=j;k<MAX_QUOTE_TERM;k++)
+                     if (quotterm1[k].index!=-1)
+                        quotterm1[k].index++;
+               } else if (is_deletelabel(quotterm2[i].term.label) && !is_deletelabel(quotterm1[j].term.label)) {
+                  reinsert_term(&quotterm2[i],terminal2,bracket2,&wn2);
+                  for (k=i;k<MAX_QUOTE_TERM;k++)
+                     if (quotterm2[k].index!=-1)
+                        quotterm2[k].index++;
+               }
+            }
+         }
+      } else break;
+   }
+}
+
+void
+reinsert_term(quot,terminal,bracket,wn)
+s_term_ind* quot;
+s_terminal terminal[];
+s_bracket bracket[];
+int* wn;
+{
+   int ind = quot->index;
+   int bra = quot->bracket;
+   s_terminal* term = &quot->term;
+   int k;
+   memmove(&terminal[ind+1],
+         &terminal[ind],
+         sizeof(s_terminal)*(MAX_WORD_IN_SENT-ind-1));
+   strcpy(terminal[ind].label,term->label);
+   strcpy(terminal[ind].word,term->word);
+   (*wn)++;
+   if (DEBUG>1)
+      printf("bra=%d, ind=%d\n",bra,ind);
+   for(k=0;k<MAX_BRACKET_IN_SENT;k++) {
+      if (bracket[k].start==-1)
+         break;
+      if (DEBUG>1)
+         printf("bracket[%d]={%d,%d}\n",k,bracket[k].start,bracket[k].end);
+      if (k>=bra) {
+         bracket[k].start++;
+         bracket[k].end++;
+      }
+      //if (bracket[k].start<=ind && bracket[k].end>=ind)
+         //bracket[k].end++;
+   }
+   if (DEBUG>1)
+      printf("endslen=%d\n",quot->endslen);
+   for(k=0;k<quot->endslen;k++) {
+      //printf("ends[%d]=%d",k,quot->ends[k]);
+      bracket[quot->ends[k]].end++;
+   }
+   //free(quot->ends);
+}
+/*
+void
+adjust_end(ind,bra)
+int ind;
+int bra;
+{
+    for(k=0;k<MAX_BRACKET_IN_SENT;k++) {
+      if (bracket[k].start==-1)
+         break;
+      printf("bracket[%d]={%d,%d}\n",k,bracket[k].start,bracket[k].end);
+      if (k>=bra)
+         bracket[k].end++;
+   }
+}
+*/
+void
+massage_data()
+{
+    int i, j;
+
+    /* for GOLD */
+    /*----------*/ 
+    for(i=0;i<bn1;i++){
+
+	bracket1[i].result = 0;
+
+	/* Zero element */
+	if(bracket1[i].start == bracket1[i].end){
+	    bracket1[i].result = 5;
+	    continue;
+	}
+
+        /* Modify label */
+	modify_label(bracket1[i].label);
+
+	/* Delete label */
+	for(j=0;j<Delete_label_n;j++){
+	    if(label_comp(bracket1[i].label,Delete_label[j])==1){
+		bracket1[i].result = 5;
+	    }
+	}
+    }
+	   
+    /* for TEST */
+    /*----------*/ 
+    for(i=0;i<bn2;i++){
+
+	bracket2[i].result = 0;
+
+	/* Zero element */
+	if(bracket2[i].start == bracket2[i].end){
+	    bracket2[i].result = 5;
+	    continue;
+	}
+
+        /* Modify label */
+	modify_label(bracket2[i].label);
+
+	/* Delete label */
+	for(j=0;j<Delete_label_n;j++){
+	    if(label_comp(bracket2[i].label,Delete_label[j])==1){
+		bracket2[i].result = 5;
+	    }
+	}
+    }
+
+
+    /* count up real number of brackets (exclude deleted ones) */
+    /*---------------------------------------------------------*/
+    r_bn1 = r_bn2 = 0;
+
+    for(i=0;i<bn1;i++){
+	if(bracket1[i].result != 5){
+	    r_bn1++;
+	}
+    }
+
+    for(i=0;i<bn2;i++){
+	if(bracket2[i].result != 5){
+	    r_bn2++;
+	}
+    }
+}
+
+
+/*------------------------*/
+/* trim the tail of label */
+/*------------------------*/
+void
+modify_label(label)
+char *label;
+{
+    char *p;
+
+    for(p=label;*p!='\0';p++){
+	if(*p=='-' || *p=='='){
+	    *p='\0';
+	    break;
+	}
+    }
+}
+
+
+/*-----------------------------------------------*/
+/* add individual statistics to TOTAL statictics */
+/*-----------------------------------------------*/
+void
+individual_result(wn1,bn1,bn2,match,crossing,correct_tag)
+int wn1,bn1,bn2,match,crossing,correct_tag;
+{
+
+    /* Statistics for ALL */
+    /*--------------------*/
+    TOTAL_sent++;
+    if(Status==1){
+	TOTAL_error_sent++;
+    }else if(Status==2){
+	TOTAL_skip_sent++;
+    }else{
+	TOTAL_bn1 += bn1;
+	TOTAL_bn2 += bn2;
+	TOTAL_match += match;
+	if(bn1==bn2 && bn2==match){
+	    TOTAL_comp_sent++;
+	}
+	TOTAL_word += wn1;
+	TOTAL_crossing += crossing;
+	if(crossing==0){
+	    TOTAL_no_crossing++;
+	}
+	if(crossing <= 2){
+	    TOTAL_2L_crossing++;
+	}
+	TOTAL_correct_tag += correct_tag;
+    }
+
+
+    /* Statistics for sent length <= TOT_cut_len */
+    /*-------------------------------------------*/
+    if(r_wn1<=TOT_cut_len){
+	TOT40_sent++;
+	if(Status==1){
+	    TOT40_error_sent++;
+	}else if(Status==2){
+	    TOT40_skip_sent++;
+	}else{
+	    TOT40_bn1 += bn1;
+	    TOT40_bn2 += bn2;
+	    TOT40_match += match;
+	    if(bn1==bn2 && bn2==match){
+		TOT40_comp_sent++;
+	    }
+	    TOT40_word += wn1;
+	    TOT40_crossing += crossing;
+	    if(crossing==0){
+		TOT40_no_crossing++;
+	    }
+	    if(crossing <= 2){
+		TOT40_2L_crossing++;
+	    }
+	    TOT40_correct_tag += correct_tag;
+	}
+    }
+
+    /* Print individual result */
+    /*-------------------------*/
+    printf("%4d  %3d    %d  ",Line,r_wn1,Status);
+    printf("%6.2f %6.2f   %3d    %3d  %3d    %3d",
+	   (r_bn1==0?0.0:100.0*match/r_bn1), 
+	   (r_bn2==0?0.0:100.0*match/r_bn2),
+	   match, r_bn1, r_bn2, crossing);
+
+    printf("   %4d  %4d   %6.2f\n",wn1,correct_tag,
+	   (wn1==0?0.0:100.0*correct_tag/wn1));
+}
+
+
+/*------------------------*/
+/* print total statistics */
+/*------------------------*/
+void
+print_total()
+{
+    int sentn;
+    double r,p,f;
+
+    printf("============================================================================\n");
+
+    if(TOTAL_bn1>0 && TOTAL_bn2>0){
+	printf("                %6.2f %6.2f %6d %5d %5d  %5d",
+	       (TOTAL_bn1>0?100.0*TOTAL_match/TOTAL_bn1:0.0),
+	       (TOTAL_bn2>0?100.0*TOTAL_match/TOTAL_bn2:0.0),
+	       TOTAL_match, 
+	       TOTAL_bn1, 
+	       TOTAL_bn2,
+	       TOTAL_crossing);
+    }
+
+    printf("  %5d %5d   %6.2f",
+	   TOTAL_word,
+	   TOTAL_correct_tag,
+	   (TOTAL_word>0?100.0*TOTAL_correct_tag/TOTAL_word:0.0));
+
+    printf("\n");
+    printf("=== Summary ===\n");
+
+    sentn = TOTAL_sent - TOTAL_error_sent - TOTAL_skip_sent;
+
+    printf("\n-- All --\n");
+    printf("Number of sentence        = %6d\n",TOTAL_sent);
+    printf("Number of Error sentence  = %6d\n",TOTAL_error_sent);
+    printf("Number of Skip  sentence  = %6d\n",TOTAL_skip_sent);
+    printf("Number of Valid sentence  = %6d\n",sentn);
+    
+    r = TOTAL_bn1>0 ? 100.0*TOTAL_match/TOTAL_bn1 : 0.0;
+    printf("Bracketing Recall         = %6.2f\n",r);
+
+    p = TOTAL_bn2>0 ? 100.0*TOTAL_match/TOTAL_bn2 : 0.0;
+    printf("Bracketing Precision      = %6.2f\n",p);
+
+    f = 2*p*r/(p+r);
+    printf("Bracketing FMeasure       = %6.2f\n",f);
+			    
+    printf("Complete match            = %6.2f\n",
+	   (sentn>0?100.0*TOTAL_comp_sent/sentn:0.0));
+    printf("Average crossing          = %6.2f\n",
+	   (sentn>0?1.0*TOTAL_crossing/sentn:0.0));
+    printf("No crossing               = %6.2f\n",
+	   (sentn>0?100.0*TOTAL_no_crossing/sentn:0.0));
+    printf("2 or less crossing        = %6.2f\n",
+	   (sentn>0?100.0*TOTAL_2L_crossing/sentn:0.0));
+    printf("Tagging accuracy          = %6.2f\n",
+	   (TOTAL_word>0?100.0*TOTAL_correct_tag/TOTAL_word:0.0));
+
+    sentn = TOT40_sent - TOT40_error_sent - TOT40_skip_sent;
+
+    printf("\n-- len<=%d --\n",TOT_cut_len);
+    printf("Number of sentence        = %6d\n",TOT40_sent);
+    printf("Number of Error sentence  = %6d\n",TOT40_error_sent);
+    printf("Number of Skip  sentence  = %6d\n",TOT40_skip_sent);
+    printf("Number of Valid sentence  = %6d\n",sentn);
+
+
+    r = TOT40_bn1>0 ? 100.0*TOT40_match/TOT40_bn1 : 0.0;
+    printf("Bracketing Recall         = %6.2f\n",r);
+
+    p = TOT40_bn2>0 ? 100.0*TOT40_match/TOT40_bn2 : 0.0;
+    printf("Bracketing Precision      = %6.2f\n",p);
+
+    f = 2*p*r/(p+r);
+    printf("Bracketing FMeasure       = %6.2f\n",f);
+
+    printf("Complete match            = %6.2f\n",
+	   (sentn>0?100.0*TOT40_comp_sent/sentn:0.0));
+    printf("Average crossing          = %6.2f\n",
+	   (sentn>0?1.0*TOT40_crossing/sentn:0.0));
+    printf("No crossing               = %6.2f\n",
+	   (sentn>0?100.0*TOT40_no_crossing/sentn:0.0));
+    printf("2 or less crossing        = %6.2f\n",
+	   (sentn>0?100.0*TOT40_2L_crossing/sentn:0.0));
+    printf("Tagging accuracy          = %6.2f\n",
+	   (TOT40_word>0?100.0*TOT40_correct_tag/TOT40_word:0.0));
+
+}
+
+
+/*--------------------------------*/
+/* display individual information */
+/*--------------------------------*/
+void
+dsp_info()
+{
+  int i, n;
+
+  printf("-<1>---(wn1=%3d, bn1=%3d)-           ",wn1,bn1);
+  printf("-<2>---(wn2=%3d, bn2=%3d)-\n",wn2,bn2);
+
+  n = (wn1>wn2?wn1:wn2);
+
+  for(i=0;i<n;i++){
+      if(terminal1[i].word[0]!='\0'){
+	  printf("%3d : %d : %-6s  %-16s      ",i,terminal1[i].result,
+		 terminal1[i].label,terminal1[i].word);
+      }else{
+	  printf("                                        ");
+      }
+
+      if(terminal2[i].word[0]!='\0'){
+	  printf("%3d : %d : %-6s  %-16s\n",i,terminal2[i].result,
+		 terminal2[i].label,terminal2[i].word);
+      }else{
+	  printf("\n");
+      }
+  }
+  printf("\n");
+
+  n = (bn1>bn2?bn1:bn2);
+
+  for(i=0;i<n;i++){
+      if(bracket1[i].start != -1){
+	  printf("%3d : %d : %3d  %3d  %-6s      ",i,bracket1[i].result,
+	                            bracket1[i].start,bracket1[i].end,
+	                            bracket1[i].label);
+      } else {
+	  printf("                                ");
+      }
+
+      if(bracket2[i].start != -1){
+	  printf("%3d : %d : %3d  %3d  %-6s\n",i,bracket2[i].result,
+                                    bracket2[i].start,bracket2[i].end,
+	                            bracket2[i].label);
+      } else {
+	  printf("\n");
+      }
+  }
+  printf("\n");
+
+  printf("========\n");
+
+}
+
+
+/*-----------------*/
+/* some predicates */
+/*-----------------*/
+int
+is_terminator(c)
+char c;
+{
+    if(isspace(c) || c=='(' || c==')'){
+	return(1);
+    }else{
+	return(0);
+    }
+}
+
+int
+is_deletelabel(s)
+char *s;
+{
+    int i;
+
+    for(i=0;i<Delete_label_n;i++){
+	if(strcmp(s,Delete_label[i])==0){
+	    return(1);
+	}
+    }
+
+    return(0);
+}
+
+int
+is_deletelabel_for_length(s)
+char *s;
+{
+    int i;
+
+    for(i=0;i<Delete_label_for_length_n;i++){
+	if(strcmp(s,Delete_label_for_length[i])==0){
+	    return(1);
+	}
+    }
+
+    return(0);
+}
+
+int
+is_quote_term(s,w)
+char *s;
+char *w;
+{
+    int i;
+
+    for(i=0;i<Quote_term_n;i++){
+		if(strcmp(s,Quote_term[i])==0){
+			if (strcmp(w,"'")==0 || strcmp(w,"\"")==0 || strcmp(w,"/")==0)
+	    	return(1);
+		}
+    }
+
+    return(0);
+}
+
+
+/*---------------*/
+/* compare words */
+/*---------------*/
+int
+word_comp(s1,s2)
+char *s1,*s2;
+{
+    int i;
+
+    if(strcmp(s1,s2)==0){
+	return(1);
+    }
+
+    for(i=0;i<EQ_word_n;i++){
+	if((strcmp(s1,EQ_word[i].s1)==0 &&
+	    strcmp(s2,EQ_word[i].s2)==0) ||
+	   (strcmp(s1,EQ_word[i].s2)==0 &&
+	    strcmp(s2,EQ_word[i].s1)==0)){
+	    return(1);
+	}
+    }
+
+    return(0);
+}
+
+/*----------------*/
+/* compare labels */
+/*----------------*/
+int
+label_comp(s1,s2)
+char *s1,*s2;
+{
+    int i;
+
+    if(strcmp(s1,s2)==0){
+	return(1);
+    }
+
+    for(i=0;i<EQ_label_n;i++){
+	if((strcmp(s1,EQ_label[i].s1)==0 &&
+	    strcmp(s2,EQ_label[i].s2)==0) ||
+	   (strcmp(s1,EQ_label[i].s2)==0 &&
+	    strcmp(s2,EQ_label[i].s1)==0)){
+	    return(1);
+	}
+    }
+
+    return(0);
+}
+
+
+/*--------*/
+/* errors */
+/*--------*/
+void
+Error(s,arg1,arg2,arg3)
+char *s, *arg1, *arg2, *arg3;
+{
+    Status = 1;
+    fprintf(stderr,"%d : ",Line);
+    fprintf(stderr,s,arg1,arg2,arg3);
+    if(Error_count++>Max_error){
+	exit(1);
+    }
+}
+
+
+/*---------------------*/
+/* fatal error to exit */
+/*---------------------*/
+void
+Fatal(s,arg1,arg2,arg3)
+char *s, *arg1, *arg2, *arg3;
+{
+    fprintf(stderr,s,arg1,arg2,arg3);
+    exit(1);
+}
+
+
+/*-------*/
+/* Usage */
+/*-------*/
+void
+Usage()
+{
+  fprintf(stderr," evalb [-dDh][-c n][-e n][-p param_file] gold-file test-file  \n");
+  fprintf(stderr,"                                                         \n");
+  fprintf(stderr,"    Evaluate bracketing in test-file against gold-file.  \n");
+  fprintf(stderr,"    Return recall, precision, F-Measure, tag accuracy.              \n");
+  fprintf(stderr,"                                                         \n");
+  fprintf(stderr,"  <option>                                               \n");
+  fprintf(stderr,"    -d             debug mode                            \n");
+  fprintf(stderr,"    -D             debug mode plus bracketing info       \n");
+  fprintf(stderr,"    -c n           cut-off length forstatistics (def.=40)\n");
+  fprintf(stderr,"    -e n           number of error to kill (default=10)  \n");
+  fprintf(stderr,"    -p param_file  parameter file                        \n");
+  fprintf(stderr,"    -h    help                                           \n");
+}
diff --git a/parsing/EVALB/new.prm b/parsing/EVALB/new.prm
new file mode 100644
index 0000000000000000000000000000000000000000..84ef7ea44eaff7d6f12ce92fa30c7220cdc04d03
--- /dev/null
+++ b/parsing/EVALB/new.prm
@@ -0,0 +1,87 @@
+##------------------------------------------##
+## Debug mode                               ##
+##   0: No debugging                        ##
+##   1: print data for individual sentence  ##
+##   2: print detailed bracketing info      ##
+##------------------------------------------##
+DEBUG 0
+
+##------------------------------------------##
+## MAX error                                ##
+##    Number of error to stop the process.  ##
+##    This is useful if there could be      ##
+##    tokanization error.                   ##
+##    The process will stop when this number##
+##    of errors are accumulated.            ##
+##------------------------------------------##
+MAX_ERROR 10
+
+##------------------------------------------##
+## Cut-off length for statistics            ##
+##    At the end of evaluation, the         ##
+##    statistics for the senetnces of length##
+##    less than or equal to this number will##
+##    be shown, on top of the statistics    ##
+##    for all the sentences                 ##
+##------------------------------------------##
+CUTOFF_LEN 40
+
+##------------------------------------------##
+## unlabeled or labeled bracketing          ##
+##    0: unlabeled bracketing               ##
+##    1: labeled bracketing                 ##
+##------------------------------------------##
+LABELED 1                 
+
+##------------------------------------------##
+## Delete labels                            ##
+##    list of labels to be ignored.         ##
+##    If it is a pre-terminal label, delete ##
+##    the word along with the brackets.     ##
+##    If it is a non-terminal label, just   ##
+##    delete the brackets (don't delete     ##
+##    deildrens).                           ##
+##------------------------------------------##
+DELETE_LABEL TOP
+DELETE_LABEL S1
+DELETE_LABEL -NONE-
+DELETE_LABEL ,
+DELETE_LABEL :
+DELETE_LABEL ``
+DELETE_LABEL ''
+DELETE_LABEL .
+DELETE_LABEL ?
+DELETE_LABEL !
+
+##------------------------------------------##
+## Delete labels for length calculation     ##
+##    list of labels to be ignored for      ##
+##    length calculation purpose            ##
+##------------------------------------------##
+DELETE_LABEL_FOR_LENGTH -NONE-
+
+##------------------------------------------##
+## Labels to be considered for misquote     ##
+##    (could be possesive or quote)         ##
+##------------------------------------------##
+QUOTE_LABEL ``
+QUOTE_LABEL ''
+QUOTE_LABEL POS
+
+##------------------------------------------##
+## These ones are less common, but          ##
+##    are on occasion output by parsers:    ##      
+##------------------------------------------##
+QUOTE_LABEL NN
+QUOTE_LABEL CD
+QUOTE_LABEL VBZ
+QUOTE_LABEL :
+
+##------------------------------------------##
+## Equivalent labels, words                 ##
+##     the pairs are considered equivalent  ##
+##     This is non-directional.             ##
+##------------------------------------------##
+EQ_LABEL ADVP PRT
+
+# EQ_WORD  Example example
diff --git a/parsing/EVALB/nk.prm b/parsing/EVALB/nk.prm
new file mode 100644
index 0000000000000000000000000000000000000000..78669757e02a465f5936441838a7701c7404059a
--- /dev/null
+++ b/parsing/EVALB/nk.prm
@@ -0,0 +1,92 @@
+# Based on new.prm (and by extension COLLINS.prm)
+# The only change from new.prm is increasing MAX_ERROR. The evaluation should be
+# identical to the standard setup, except that evalb won't give up early for a
+# parser that has just started training and does not yet produce good results.
+
+##------------------------------------------##
+## Debug mode                               ##
+##   0: No debugging                        ##
+##   1: print data for individual sentence  ##
+##   2: print detailed bracketing info      ##
+##------------------------------------------##
+DEBUG 0
+
+##------------------------------------------##
+## MAX error                                ##
+##    Number of error to stop the process.  ##
+##    This is useful if there could be      ##
+##    tokanization error.                   ##
+##    The process will stop when this number##
+##    of errors are accumulated.            ##
+##------------------------------------------##
+MAX_ERROR 10000
+
+##------------------------------------------##
+## Cut-off length for statistics            ##
+##    At the end of evaluation, the         ##
+##    statistics for the senetnces of length##
+##    less than or equal to this number will##
+##    be shown, on top of the statistics    ##
+##    for all the sentences                 ##
+##------------------------------------------##
+CUTOFF_LEN 40
+
+##------------------------------------------##
+## unlabeled or labeled bracketing          ##
+##    0: unlabeled bracketing               ##
+##    1: labeled bracketing                 ##
+##------------------------------------------##
+LABELED 1
+
+##------------------------------------------##
+## Delete labels                            ##
+##    list of labels to be ignored.         ##
+##    If it is a pre-terminal label, delete ##
+##    the word along with the brackets.     ##
+##    If it is a non-terminal label, just   ##
+##    delete the brackets (don't delete     ##
+##    deildrens).                           ##
+##------------------------------------------##
+DELETE_LABEL TOP
+DELETE_LABEL S1
+DELETE_LABEL -NONE-
+DELETE_LABEL ,
+DELETE_LABEL :
+DELETE_LABEL ``
+DELETE_LABEL ''
+DELETE_LABEL .
+DELETE_LABEL ?
+DELETE_LABEL !
+
+##------------------------------------------##
+## Delete labels for length calculation     ##
+##    list of labels to be ignored for      ##
+##    length calculation purpose            ##
+##------------------------------------------##
+DELETE_LABEL_FOR_LENGTH -NONE-
+
+##------------------------------------------##
+## Labels to be considered for misquote     ##
+##    (could be possesive or quote)         ##
+##------------------------------------------##
+QUOTE_LABEL ``
+QUOTE_LABEL ''
+QUOTE_LABEL POS
+
+##------------------------------------------##
+## These ones are less common, but          ##
+##    are on occasion output by parsers:    ##
+##------------------------------------------##
+QUOTE_LABEL NN
+QUOTE_LABEL CD
+QUOTE_LABEL VBZ
+QUOTE_LABEL :
+
+##------------------------------------------##
+## Equivalent labels, words                 ##
+##     the pairs are considered equivalent  ##
+##     This is non-directional.             ##
+##------------------------------------------##
+EQ_LABEL ADVP PRT
+
+# EQ_WORD  Example example
diff --git a/parsing/EVALB/sample/sample.gld b/parsing/EVALB/sample/sample.gld
new file mode 100644
index 0000000000000000000000000000000000000000..c94b8be3e8edd8a1d168d2f2117292a55f466b9d
--- /dev/null
+++ b/parsing/EVALB/sample/sample.gld
@@ -0,0 +1,24 @@
+(S (A (P this)) (B (Q is) (A (R a) (T test))))
+(S (A (P this)) (B (Q is) (A (R a) (T test))))
+(S (A (P this)) (B (Q is) (A (R a) (T test))))
+(S (A (P this)) (B (Q is) (A (R a) (T test))))
+(S (A (P this)) (B (Q is) (A (R a) (T test))))
+(S (A (P this)) (B (Q is) (A (R a) (T test))))
+(S (A (P this)) (B (Q is) (A (R a) (T test))))
+(S (A (P this)) (B (Q is) (A (R a) (T test))))
+(S (A (P this)) (B (Q is) (A (R a) (T test))))
+(S (A (P this)) (B (Q is) (A (R a) (T test))))
+(S (A (P this)) (B (Q is) (A (R a) (T test))))
+(S (A (P this)) (B (Q is) (A (R a) (T test))))
+(S (A (P this)) (B (Q is) (A (R a) (T test))))
+(S (A (P this)) (B (Q is) (A (R a) (T test))))
+(S (A (P this)) (B (Q is) (A (R a) (T test))))
+(S (A (P this)) (B (Q is) (A (R a) (T test))))
+(S (A (P this)) (B (Q is) (A (R a) (T test))))
+(S (A (P this)) (B (Q is) (A (R a) (T test))))
+(S (A (P this)) (B (Q is) (A (R a) (T test))))
+(S (A (P this)) (B (Q is) (A (R a) (T test))))
+(S (A-SBJ-1 (P this)) (B-WHATEVER (Q is) (A (R a) (T test))))
+(S (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))))
+(S (A (P this)) (B (Q is) (A (R a) (T test))) (-NONE- *))
+(S (A (P this)) (B (Q is) (A (R a) (T test))) (: *))
diff --git a/parsing/EVALB/sample/sample.prm b/parsing/EVALB/sample/sample.prm
new file mode 100644
index 0000000000000000000000000000000000000000..57d7c6e19cda7bedf4beeacd5e5e6fb1d1036023
--- /dev/null
+++ b/parsing/EVALB/sample/sample.prm
@@ -0,0 +1,65 @@
+##------------------------------------------##
+## Debug mode                               ##
+##   print out data for individual sentence ##
+##------------------------------------------##
+DEBUG 0
+
+##------------------------------------------##
+## MAX error                                ##
+##    Number of error to stop the process.  ##
+##    This is useful if there could be      ##
+##    tokanization error.                   ##
+##    The process will stop when this number##
+##    of errors are accumulated.            ##
+##------------------------------------------##
+MAX_ERROR 10
+
+##------------------------------------------##
+## Cut-off length for statistics            ##
+##    At the end of evaluation, the         ##
+##    statistics for the senetnces of length##
+##    less than or equal to this number will##
+##    be shown, on top of the statistics    ##
+##    for all the sentences                 ##
+##------------------------------------------##
+CUTOFF_LEN 40
+
+##------------------------------------------##
+## unlabeled or labeled bracketing          ##
+##    0: unlabeled bracketing               ##
+##    1: labeled bracketing                 ##
+##------------------------------------------##
+LABELED 1                 
+
+##------------------------------------------##
+## Delete labels                            ##
+##    list of labels to be ignored.         ##
+##    If it is a pre-terminal label, delete ##
+##    the word along with the brackets.     ##
+##    If it is a non-terminal label, just   ##
+##    delete the brackets (don't delete     ##
+##    deildrens).                           ##
+##------------------------------------------##
+DELETE_LABEL TOP
+DELETE_LABEL -NONE-
+DELETE_LABEL ,
+DELETE_LABEL :
+DELETE_LABEL ``
+DELETE_LABEL ''
+
+##------------------------------------------##
+## Delete labels for length calculation     ##
+##    list of labels to be ignored for      ##
+##    length calculation purpose            ##
+##------------------------------------------##
+DELETE_LABEL_FOR_LENGTH -NONE-
+
+
+##------------------------------------------##
+## Equivalent labels, words                 ##
+##     the pairs are considered equivalent  ##
+##     This is non-directional.             ##
+##------------------------------------------##
+EQ_LABEL T TT
+
+EQ_WORD  This this
diff --git a/parsing/EVALB/sample/sample.rsl b/parsing/EVALB/sample/sample.rsl
new file mode 100644
index 0000000000000000000000000000000000000000..0ad36181268d4ff3bdb1727a5c20a8fa28a0e2c2
--- /dev/null
+++ b/parsing/EVALB/sample/sample.rsl
@@ -0,0 +1,56 @@
+  Sent.                        Matched  Bracket   Cross        Correct Tag
+ ID  Len.  Stat. Recal  Prec.  Bracket gold test Bracket Words  Tags Accracy
+============================================================================
+   1    4    0  100.00 100.00     4      4    4      0      4     4   100.00
+   2    4    0   75.00  75.00     3      4    4      0      4     4   100.00
+   3    4    0  100.00 100.00     4      4    4      0      4     3    75.00
+   4    4    0   75.00  75.00     3      4    4      0      4     3    75.00
+   5    4    0   75.00  75.00     3      4    4      0      4     4   100.00
+   6    4    0   50.00  66.67     2      4    3      1      4     4   100.00
+   7    4    0   25.00 100.00     1      4    1      0      4     4   100.00
+   8    4    0    0.00   0.00     0      4    0      0      4     4   100.00
+   9    4    0  100.00  80.00     4      4    5      0      4     4   100.00
+  10    4    0  100.00  50.00     4      4    8      0      4     4   100.00
+  11    4    2    0.00   0.00     0      0    0      0      4     0     0.00
+  12    4    1    0.00   0.00     0      0    0      0      4     0     0.00
+  13    4    1    0.00   0.00     0      0    0      0      4     0     0.00
+  14    4    2    0.00   0.00     0      0    0      0      4     0     0.00
+  15    4    0  100.00 100.00     4      4    4      0      4     4   100.00
+  16    4    1    0.00   0.00     0      0    0      0      4     0     0.00
+  17    4    1    0.00   0.00     0      0    0      0      4     0     0.00
+  18    4    0  100.00 100.00     4      4    4      0      4     4   100.00
+  19    4    0  100.00 100.00     4      4    4      0      4     4   100.00
+  20    4    1    0.00   0.00     0      0    0      0      4     0     0.00
+  21    4    0  100.00 100.00     4      4    4      0      4     4   100.00
+  22   44    0  100.00 100.00    34     34   34      0     44    44   100.00
+  23    4    0  100.00 100.00     4      4    4      0      4     4   100.00
+  24    5    0  100.00 100.00     4      4    4      0      4     4   100.00
+============================================================================
+                 87.76  90.53     86     98   95     16    108   106    98.15
+=== Summary ===
+
+-- All --
+Number of sentence        =     24
+Number of Error sentence  =      5
+Number of Skip  sentence  =      2
+Number of Valid sentence  =     17
+Bracketing Recall         =  87.76
+Bracketing Precision      =  90.53
+Complete match            =  52.94
+Average crossing          =   0.06
+No crossing               =  94.12
+2 or less crossing        = 100.00
+Tagging accuracy          =  98.15
+
+-- len<=40 --
+Number of sentence        =     23
+Number of Error sentence  =      5
+Number of Skip  sentence  =      2
+Number of Valid sentence  =     16
+Bracketing Recall         =  81.25
+Bracketing Precision      =  85.25
+Complete match            =  50.00
+Average crossing          =   0.06
+No crossing               =  93.75
+2 or less crossing        = 100.00
+Tagging accuracy          =  96.88
diff --git a/parsing/EVALB/sample/sample.tst b/parsing/EVALB/sample/sample.tst
new file mode 100644
index 0000000000000000000000000000000000000000..c840e7115312b06b485277ac4481812c37691f89
--- /dev/null
+++ b/parsing/EVALB/sample/sample.tst
@@ -0,0 +1,24 @@
+(S (A (P this)) (B (Q is) (A (R a) (T test))))
+(S (A (P this)) (B (Q is) (C (R a) (T test))))
+(S (A (P this)) (B (Q is) (A (R a) (U test))))
+(S (C (P this)) (B (Q is) (A (R a) (U test))))
+(S (A (P this)) (B (Q is) (R a) (A (T test))))
+(S (A (P this) (Q is)) (A (R a) (T test)))
+(S (P this) (Q is) (R a) (T test))
+(P this) (Q is) (R a) (T test)
+(S (A (P this)) (B (Q is) (A (A (R a) (T test)))))
+(S (A (P this)) (B (Q is) (A (A (A (A (A (R a) (T test))))))))
+
+(S (A (P this)) (B (Q was) (A (A (R a) (T test)))))
+(S (A (P this)) (B (Q is) (U not) (A (A (R a) (T test)))))
+
+(TOP (S (A (P this)) (B (Q is) (A (R a) (T test)))))
+(S (A (P this)) (NONE *) (B (Q is) (A (R a) (T test))))
+(S (A (P this)) (S (NONE abc) (A (NONE *))) (B (Q is) (A (R a) (T test))))
+(S (A (P this)) (B (Q is) (A (R a) (TT test))))
+(S (A (P This)) (B (Q is) (A (R a) (T test))))
+(S (A (P That)) (B (Q is) (A (R a) (T test))))
+(S (A (P this)) (B (Q is) (A (R a) (T test))))
+(S (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))))
+(S (A (P this)) (B (Q is) (A (R a) (T test))) (-NONE- *))
+(S (A (P this)) (B (Q is) (A (R a) (T test))) (: *))
diff --git a/parsing/EVALB/tgrep_proc.prl b/parsing/EVALB/tgrep_proc.prl
new file mode 100644
index 0000000000000000000000000000000000000000..61a7cabffc2b557ce6173566a61b236aa53aa721
--- /dev/null
+++ b/parsing/EVALB/tgrep_proc.prl
@@ -0,0 +1,9 @@
+#!/usr/local/bin/perl
+
+while(<>)
+{
+    if(m/TOP/) #skip lines which are blank
+    {
+	print;
+    } 
+}
diff --git a/parsing/EVALB_SPMRL/Makefile b/parsing/EVALB_SPMRL/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..2498f02b7368c9bef670af5e912313aba97462a2
--- /dev/null
+++ b/parsing/EVALB_SPMRL/Makefile
@@ -0,0 +1,65 @@
+TESTFILE=dev.Arabic.gold.ptb.-feat
+JAVA=java
+
+
+all: evalb
+
+
+clean:
+	rm -f evalb_spmrl
+	
+install: evalb
+	cp evalb_spmrl /usr/local/bin
+
+
+
+evalb: evalb.c
+	gcc -Wall -O3 -g -o evalb_spmrl evalb.c
+	
+evalb_linux: evalb.c
+	gcc -Wall -fPIC -O3 -g -o evalb_spmrl evalb.c
+#to compile on linux
+
+          
+
+# note: on the original makefile, the funsigned-char option was applied
+
+home: install_home
+
+install_home: all
+	cp evalb_spmrl ${PREFIX}/bin
+	
+up:
+	tar zcvf ../evalb_spmrl2013.tar.gz ../evalb_spmrl2013/
+	putW ../evalb_spmrl2013.tar.gz
+
+
+
+#################################
+# stuff to debug some treebanks #
+#################################	
+test_full: all
+	./evalb dev.Arabic.gold.ptb dev.Arabic.gold.ptb
+
+
+test: all
+	./evalb -p ./new.prm ${TESTFILE} ${TESTFILE}
+
+debug: all
+	./evalb -D ${TESTFILE} ${TESTFILE}
+	echo "./evalb -D ${TESTFILE} ${TESTFILE}"
+
+debug_one: all
+	lines 616 < ${TESTFILE} > ${TESTFILE}.616
+	./evalb -D ${TESTFILE}.616 ${TESTFILE}.616
+	echo "./evalb -D ${TESTFILE}.616 ${TESTFILE}.616"	
+
+releaf:
+	./evalb -D dev.Arabic.gold.ptb.-feat.616.bug dev.Arabic.gold.ptb.-feat.616.bug
+	echo "./evalb -D dev.Arabic.gold.ptb.-feat.616.bug dev.Arabic.gold.ptb.-feat.616.bug" > /dev/stderr
+
+java: 
+	${JAVA} -jar ./evalC/evalC.jar ${TESTFILE} ${TESTFILE} /dev/stdout
+		
+	
+		
\ No newline at end of file
diff --git a/parsing/EVALB_SPMRL/README b/parsing/EVALB_SPMRL/README
new file mode 100644
index 0000000000000000000000000000000000000000..e3c381ea7a8ba73431142efa73997bbda887755b
--- /dev/null
+++ b/parsing/EVALB_SPMRL/README
@@ -0,0 +1,76 @@
+
+// Djam�: version record added for history's sake.
+// note to future updater: please add your changelog below
+
+(copied from http://nlp.cs.nyu.edu/evalb/ )
+EVALB20080701.tgz (July 1, 2008 version) modified by Don Blaheta (Knox College)
+EVALB20060307.tgz (March 3, 2006 version; debuged of Jan. 17, 2006 version) modified by David Ellis (Brown University)
+EVALB20060117.tgz (Jan. 17, 2006 version) modified by David Ellis (Brown University)
+EVALB20050908.tgz (Sept. 8, 2005 version) modified by David Brroks (Birmingham)
+EVALB.tgz (original version).
+Authors
+
+Satoshi Sekine (New York University) :  e-mail: his last name (at) cs.nyu.edu
+Michael John Collins (University of Pennsylvania)
+Note: the authors are not responsible for the newer versions. We put these versions even without checking the program. Please be responsible for yourself.
+
+*************************************************************************
+
+Modification
+
+David Brroks (Birmingham): fixed the code so that the program can be compiled by the latest gcc (September 2005). Helps are given by Peet Morris and Ramon Ziai through the Corpora Mailing list.
+David Ellis (Brown University) : fixes a bug in which sentences were incorrectly categorized as "length mismatch" when the the parse output had certain mislabeled parts-of-speech.
+Don Blaheta (KNOX) : fixes a bug on the output of last number of the total information was not TOTAL_crossing, but it was TOTAL_non_crossing.
+
+
+
+April 2012
+// Modified by Slav Petrov and Ryanc Mc Donald (Google inc., for  the sancl 2012 shared task)
+// ===>  making it less sensitive to punct POS errors leading to
+// mismatch of length
+
+
+August 2013, 10
+// Modified by Djam� Seddah (Univ. Paris Sorbonne, for the spmrl 2013 shared  task)
+// ===> making it able to cope with Arabic very long lines (byte wise)
+// ===>  now limit is 50000 bytes, was 5000 (tricky bug, if you ask me)
+// please check the constant macro section if you encounter weird bugs not present in other
+implementations (check evalC by Federico Sangatti for example, http://homepages.inf.ed.ac.uk/fsangati/evalC_25_5_10.zip )
+
+
+August 2013, 23
+// Modif from Thomas M�ller (IMS Stuttgart)
+// ===> adding of # in the stop word modify_label function (so that the
+// lexer will read NPP instead of NPP##feat:...### as in hte SPMRL Data set
+// Modif from Djam� Seddah
+// ===>  Application of modify_label to all labels (including the POS label
+// wich were left untouched for some reasons)
+// That should btw be an option. (wether to evaluate full labels or not,
+// only stripping of Non Terminal, POS tag and so on)
+ 
+
+August 2013, 27
+// Modif from Djam�
+// --> adding of an option to include the non parsed sentences in the
+// --> evaluation (-X option)
+// --> adding an option to evaluate only the first N sentences (-K n)
+// --> adding an option to provide a compact results view (-L) so one can do
+// --> find ./ -name "*parsed.run?" -exec evalb_spmrl -L GOLD {} \; -print |
+// --> grep -v '=====' | grep '='
+
+September 2013, 6
+// Modif from DJame
+// fixing the infinite slowness bug (shame on me)
+// now speed is similar to what it was before
+
+
+October 2013, 13
+// Addition from Djame
+// Adding the spmrl_hebrew.prm if one wants to evaluate hebrew parsing within the
+// same conditions as the state-of-the-art
+// namely without counting the additional SYNpos layer which inflates evalb
+// scores by almost 2 points.
+// Note: for the spmrl shared task, we used the spmrl.prm file (so with
+// these labels. It was too late to modify the rules once again when we
+// realized this)
+
diff --git a/parsing/EVALB_SPMRL/README.orig b/parsing/EVALB_SPMRL/README.orig
new file mode 100644
index 0000000000000000000000000000000000000000..73c422f7ea568403284396d412f74b0bb083ec81
--- /dev/null
+++ b/parsing/EVALB_SPMRL/README.orig
@@ -0,0 +1,230 @@
+#################################################################
+#                                                               #
+#      README file for evalb                                    #
+#                                                               #
+#                                         Satoshi Sekine (NYU)  #
+#                                         Mike Collins (UPenn)  #
+#                                                               #
+#                                         October.1997          #
+#################################################################
+
+Contents of this README:
+
+   [0] COPYRIGHT
+   [1] INTRODUCTION
+   [2] INSTALLATION AND RUN
+   [3] OPTIONS
+   [4] OUTPUT FORMAT FROM THE SCORER
+   [5] HOW TO CREATE A GOLDFILE FROM THE TREEBANK
+   [6] THE PARAMETER FILE
+   [7] MORE DETAILS ABOUT THE SCORING ALGORITHM
+
+
+[0] COPYRIGHT
+
+The authors abandon the copyright of this program. Everyone is 
+permitted to copy and distribute the program or a portion of the program
+with no charge and no restrictions unless it is harmful to someone.
+
+However, the authors are delightful for the user's kindness of proper
+usage and letting the authors know bugs or problems.
+
+This software is provided "AS IS", and the authors make no warranties,
+express or implied.
+
+
+[1] INTRODUCTION
+
+Evaluation of bracketing looks simple, but in fact, there are minor
+differences from system to system. This is a program to parametarize
+such minor differences and to give an informative result.
+
+"evalb" evaluates bracketing accuracy in a test-file against a gold-file.
+It returns recall, precision, tagging accuracy. It uses an identical 
+algorithm to that used in (Collins ACL97).
+
+
+[2] Installation and Run
+
+To compile the scorer, type 
+
+> make
+
+
+To run the scorer:
+
+> evalb -p Parameter_file Gold_file Test_file
+
+ 
+For example to use the sample files:
+
+> evalb -p sample.prm sample.gld sample.tst
+
+
+
+[3] OPTIONS
+
+You can specify system parameters in the command line options.
+Other options concerning to evaluation metrix should be specified
+in parameter file, described later.
+
+        -p param_file  parameter file                        
+        -d             debug mode                            
+        -e n           number of error to kill (default=10)  
+        -h             help                                  
+
+
+
+[4] OUTPUT FORMAT FROM THE SCORER
+
+The scorer gives individual scores for each sentence, for
+example:
+
+  Sent.                        Matched  Bracket   Cross        Correct Tag
+ ID  Len.  Stat. Recal  Prec.  Bracket gold test Bracket Words  Tags Accracy
+============================================================================
+   1    8    0  100.00 100.00     5      5    5      0      6     5    83.33
+
+At the end of the output the === Summary === section gives statistics 
+for all sentences, and for sentences <=40 words in length. The summary
+contains the following information:
+
+i)   Number of sentences -- total number of sentences.
+
+ii)  Number of Error/Skip sentences -- should both be 0 if there is no
+    problem with the parsed/gold files.
+
+iii) Number of valid sentences = Number of sentences - Number of Error/Skip
+    sentences 
+
+iv)  Bracketing recall =     (number of correct constituents)
+                         ----------------------------------------
+                         (number of constituents in the goldfile)
+
+v)   Bracketing precision = (number of correct constituents)
+                         ----------------------------------------
+                         (number of constituents in the parsed file)
+
+vi)  Complete match = percentaage of sentences where recall and precision are
+    both 100%. 
+
+vii) Average crossing = (number of constituents crossing a goldfile constituen
+                         ----------------------------------------------------
+                                        (number of sentences)
+
+viii) No crossing = percentage of sentences which have 0 crossing brackets.
+
+ix)   2 or less crossing = percentage of sentences which have <=2 crossing brackets.
+
+x)    Tagging accuracy = percentage of correct POS tags (but see [5].3 for exact
+     details of what is counted).
+
+
+
+[5] HOW TO CREATE A GOLDFILE FROM THE PENN TREEBANK
+
+
+The gold and parsed files are in a format similar to this:
+
+(TOP (S (INTJ (RB No)) (, ,) (NP (PRP it)) (VP (VBD was) (RB n't) (NP (NNP Black) (NNP Monday))) (. .)))
+
+To create a gold file from the treebank:
+
+tgrep -wn '/.*/' | tgrep_proc.prl 
+
+will produce a goldfile in the required format.  ("tgrep -wn '/.*/'" prints
+parse trees, "tgrep_process.prl" just skips blank lines).
+
+For example, to produce a goldfile for section 23 of the treebank:
+
+tgrep -wn '/.*/' | tail +90895 | tgrep_process.prl | sed 2416q > sec23.gold
+
+
+
+[6] THE PARAMETER (.prm) FILE
+
+
+The .prm file sets options regarding the scoring method. COLLINS.prm gives
+the same scoring behaviour as the scorer used in (Collins 97). The options 
+chosen were: 
+
+1) LABELED 1
+
+to give labelled precision/recall figures, i.e. a constituent must have the
+same span *and* label as a constituent in the goldfile.
+
+2) DELETE_LABEL TOP   
+
+Don't count the "TOP" label (which is always given in the output of tgrep) 
+when scoring. 
+
+3) DELETE_LABEL -NONE-  
+
+Remove traces (and all constituents which dominate nothing but traces) when
+scoring. For example
+
+.... (VP (VBD reported) (SBAR (-NONE- 0) (S (-NONE- *T*-1)))) (. .)))
+
+would be processed to give
+
+.... (VP (VBD reported)) (. .)))
+
+
+4)
+DELETE_LABEL ,     -- for the purposes of scoring remove punctuation
+DELETE_LABEL :
+DELETE_LABEL ``
+DELETE_LABEL ''
+DELETE_LABEL .
+
+5) DELETE_LABEL_FOR_LENGTH -NONE-   -- don't include traces when calculating
+                                       the length of a sentence (important
+                                       when classifying a sentence as <=40
+                                       words or >40 words)
+
+6) EQ_LABEL ADVP PRT
+
+Count ADVP and PRT as being the same label when scoring.
+
+
+
+
+[7] MORE DETAILS ABOUT THE SCORING ALGORITHM
+
+
+1) The scorer initially processes the files to remove all nodes specified
+by DELETE_LABEL in the .prm file. It also recursively removes nodes which
+dominate nothing due to all their children being removed. For example, if
+-NONE- is specified as a label to be deleted, 
+
+.... (VP (VBD reported) (SBAR (-NONE- 0) (S (-NONE- *T*-1)))) (. .)))
+
+would be processed to give
+
+.... (VP (VBD reported)) (. .)))
+
+2) The scorer also removes all functional tags attached to non-terminals
+(functional tags are prefixed with "-" or "=" in the treebank). For example
+"NP-SBJ" is processed to give "NP", "NP=2" is changed to "NP".
+
+
+3) Tagging accuracy counts tags for all words *except* any tags which are
+deleted by a DELETE_LABEL specification in the .prm file. (For example, for
+COLLINS.prm, punctuation tagged as "," ":" etc. would not be included).
+
+4) When calculating the length of a sentence, all words with POS tags not 
+included in the "DELETE_LABEL_FOR_LENGTH" list in the .prm file are
+counted. (For COLLINS.prm, only "-NONE-" is specified in this list, so
+traces are removed before calculating the length of the sentence).
+
+5) There are some subtleties in scoring when either the goldfile or parsed
+file contains multiple constituents for the same span which have the same
+non-terminal label. e.g. (NP (NP the man)) If the goldfile contains n 
+constituents for the same span, and the parsed file contains m constituents
+with that nonterminal, the scorer works as follows:
+
+i) If m>n, then the precision is n/m, recall is 100%
+
+ii) If n>m, then the precision is 100%, recall is m/n.
+
+iii) If n==m, recall and precision are both 100%.
diff --git a/parsing/EVALB_SPMRL/evalb.c b/parsing/EVALB_SPMRL/evalb.c
new file mode 100644
index 0000000000000000000000000000000000000000..16d6b453c9e59c4015805672877bab6f8456090e
--- /dev/null
+++ b/parsing/EVALB_SPMRL/evalb.c
@@ -0,0 +1,1724 @@
+/*****************************************************************/
+/* evalb [-p param_file] [-dh] [-e n] gold-file test-file        */
+/*                                                               */
+/*        Evaluate bracketing in test-file against gold-file.    */
+/*        Return recall, precision, tagging accuracy.            */
+/*                                                               */
+/*   <option>                                                    */
+/*        -p param_file  parameter file                          */
+/*        -d             debug mode                              */
+/*        -e n           number of error to kill (default=10)    */
+/*        -h             help                                    */
+/*                                                               */
+/*                                         Satoshi Sekine (NYU)  */
+/*                                         Mike Collins (UPenn)  */
+/*                                                               */
+/*                                         October.1997          */
+/*                                                               */
+/* Please refer README for the update information                */
+/*****************************************************************/
+
+// Djamé: version record added for history's sake.
+// note to future updater: please add your changelog below
+
+// Modified by Slav Petrov and Ryanc Mc Donald (for  the sancl 2012 shared task)
+// ===>  making it less sensitive to punct POS errors leading to
+// ===> mismatch of length
+
+// Modified by Djamé Seddah (for the spmrl shared 2013 task)
+// ===> making it able to cope with Arabic very long lines (byte wise)
+// ===>  now limit is 50000 bytes, was 5000 (damn bug, if you ask me)
+// ===> modified to cope with spmrl 2013 morpg features (suggested by thomas Muller from IMS)
+// please check the constant macro section
+// Correction of bug causing hard slowdown (due to max_word_in_sent set too high)
+// former version was 78x slower than regular evalb.
+
+
+
+
+
+
+#include <stdio.h>
+#include <stdlib.h> //### added for exit, atoi decls
+#include <ctype.h>
+#include <string.h>
+#ifndef __APPLE__  # dj: added to compile on mac os x
+#include <malloc.h>
+#endif
+
+/* Internal Data format -------------------------------------------*/
+/*                                                                 */
+/* (S (NP (NNX this)) (VP (VBX is) (NP (DT a) (NNX pen))) (SYM .)) */
+/*                                                                 */
+/*   wn=5                                                          */
+/*                        word    label                            */
+/*   terminal[0] =        this     NNX                             */
+/*   terminal[1] =        is       VBX                             */
+/*   terminal[2] =        a        DT                              */
+/*   terminal[3] =        pen      NNX                             */
+/*   terminal[4] =        .        SYM                             */
+/*                                                                 */
+/*   bn=4                                                          */
+/*                      start     end      label                   */
+/*   bracket[0]  =        0        5         S                     */
+/*   bracket[1]  =        0        0         NP                    */
+/*   bracket[2]  =        1        4         VP                    */
+/*   bracket[3]  =        2        4         NP                    */
+/*                                                                 */
+/*              matched bracketing                                 */
+/*   Recall = ---------------------------                          */
+/*             # of bracket in ref-data                            */
+/*                                                                 */
+/*              matched bracketing                                 */
+/*   Recall = ---------------------------                          */
+/*             # of bracket in test-data                           */
+/*                                                                 */
+/*-----------------------------------------------------------------*/
+
+/******************/
+/* constant macro */
+/******************/
+
+#define MAX_SENT_LEN            50000   //Djamé : was not used
+#define MAX_WORD_IN_SENT        1000
+#define MAX_BRACKET_IN_SENT     2000
+#define MAX_WORD_LEN            100
+#define MAX_LABEL_LEN           300
+#define MAX_QUOTE_TERM           20
+
+#define MAX_DELETE_LABEL        1000
+#define MAX_EQ_LABEL            1000
+#define MAX_EQ_WORD             1000
+
+#define MAX_LINE_LEN            500
+
+#define DEFAULT_MAX_ERROR        10
+#define DEFAULT_CUT_LEN          40
+
+/*************/
+/* structure */
+/*************/
+
+typedef struct ss_terminal {
+    char word[MAX_WORD_LEN];
+    char label[MAX_LABEL_LEN];
+    int  result;                /* 0:unmatch, 1:match, 9:undef */
+} s_terminal;
+
+typedef struct ss_term_ind {
+	s_terminal term;
+	int index;
+	int bracket;
+	int endslen;
+	int ends[MAX_BRACKET_IN_SENT];
+} s_term_ind;
+
+typedef struct ss_bracket {
+    int start;
+    int end;
+    unsigned int buf_start;
+    unsigned int buf_end;
+    char label[MAX_LABEL_LEN];
+    int  result;                 /* 0: unmatch, 1:match, 5:delete 9:undef */
+} s_bracket;
+
+
+typedef struct ss_equiv {
+    char *s1;
+    char *s2;
+} s_equiv;
+
+
+/****************************/
+/* global variables         */
+/*   gold-data: suffix = 1  */
+/*   test-data: suffix = 2  */
+/****************************/
+
+/*---------------*/
+/* Sentence data */
+/*---------------*/
+int wn1, wn2;                              /* number of words in sentence  */
+int r_wn1;                                 /* number of words in sentence  */
+/* which only ignores labels in */
+/* DELETE_LABEL_FOR_LENGTH      */
+
+s_terminal terminal1[MAX_WORD_IN_SENT];    /* terminal information */
+s_terminal terminal2[MAX_WORD_IN_SENT];
+
+s_term_ind quotterm1[MAX_QUOTE_TERM];      /* special terminals ("'","POS") */
+s_term_ind quotterm2[MAX_QUOTE_TERM];
+
+int bn1, bn2;                              /* number of brackets */
+
+int r_bn1, r_bn2;                          /* number of brackets */
+/* after deletion */
+
+s_bracket bracket1[MAX_BRACKET_IN_SENT];   /* bracket information */
+s_bracket bracket2[MAX_BRACKET_IN_SENT];
+
+
+/*------------*/
+/* Total data */
+/*------------*/
+int TOTAL_bn1, TOTAL_bn2, TOTAL_match;     /* total number of brackets */
+int TOTAL_sent;                            /* No. of sentence */
+int TOTAL_error_sent;                      /* No. of error sentence */
+int TOTAL_skip_sent;                       /* No. of skip sentence */
+int TOTAL_comp_sent;                       /* No. of complete match sent */
+int TOTAL_word;                            /* total number of word */
+int TOTAL_crossing;                        /* total crossing */
+int TOTAL_no_crossing;                     /* no crossing sentence */
+int TOTAL_2L_crossing;                     /* 2 or less crossing sentence */
+int TOTAL_correct_tag;                     /* total correct tagging */
+
+int TOT_cut_len = DEFAULT_CUT_LEN;         /* Cut-off length in statistics */
+
+/* data for sentences with len <= CUT_LEN */
+/* Historically it was 40.                */
+int TOT40_bn1, TOT40_bn2, TOT40_match;     /* total number of brackets */
+int TOT40_sent;                            /* No. of sentence */
+int TOT40_error_sent;                      /* No. of error sentence */
+int TOT40_skip_sent;                       /* No. of skip sentence */
+int TOT40_comp_sent;                       /* No. of complete match sent */
+int TOT40_word;                            /* total number of word */
+int TOT40_crossing;                        /* total crossing */
+int TOT40_no_crossing;                     /* no crossing sentence */
+int TOT40_2L_crossing;                     /* 2 or less crossing sentence */
+int TOT40_correct_tag;                     /* total correct tagging */
+
+/*------------*/
+/* miscallous */
+/*------------*/
+int Line;                                  /* line number */
+int Error_count = 0;                       /* Error count */
+int Status;                                /* Result status for each sent */
+/*    0: OK, 1: skip, 2: error */
+
+/*-------------------*/
+/* stack manuplation */
+/*-------------------*/
+int stack_top;
+int stack[MAX_BRACKET_IN_SENT];
+
+/************************************************************/
+/* User parameters which can be specified in parameter file */
+/************************************************************/
+
+/*------------------------------------------*/
+/* Debug mode                               */
+/*   print out data for individual sentence */
+/*------------------------------------------*/
+int DEBUG=0;
+
+/*------------------------------------------*/
+/* MAX error                                */
+/*    Number of error to stop the process.  */
+/*    This is useful if there could be      */
+/*    tokanization error.                   */
+/*    The process will stop when this number*/
+/*    of errors are accumulated.            */
+/*------------------------------------------*/
+int Max_error = DEFAULT_MAX_ERROR;
+
+/*------------------------------------------*/
+/* Cut-off length for statistics            */
+/*    int TOT_cut_len = DEFAULT_CUT_LEN;    */
+/*    (Defined above)                       */
+/*------------------------------------------*/
+
+
+/*------------------------------------------*/
+/* unlabeled or labeled bracketing          */
+/*    0: unlabeled bracketing               */
+/*    1: labeled bracketing                 */
+/*------------------------------------------*/
+int F_label    = 1;                 
+
+/*------------------------------------------*/
+/* Delete labels                            */
+/*    list of labels to be ignored.         */
+/*    If it is a pre-terminal label, delete */
+/*    the word along with the brackets.     */
+/*    If it is a non-terminal label, just   */
+/*    delete the brackets (don't delete     */
+/*    childrens).                           */
+/*------------------------------------------*/
+char *Delete_label[MAX_DELETE_LABEL];
+int Delete_label_n = 0;
+
+/*------------------------------------------*/
+/* Delete labels for length calculation     */
+/*    list of labels to be ignored for      */
+/*    length calculation purpose            */
+/*------------------------------------------*/
+char *Delete_label_for_length[MAX_DELETE_LABEL];
+int Delete_label_for_length_n = 0;
+
+/*------------------------------------------*/
+/* Labels to be considered for misquote     */
+/*    (could be possesive or quote)         */
+/*------------------------------------------*/
+char *Quote_term[MAX_QUOTE_TERM];
+int Quote_term_n = 0;
+
+/*------------------------------------------*/
+/* Equivalent labels, words                 */
+/*     the pairs are considered equivalent  */
+/*     This is non-directional.             */
+/*------------------------------------------*/
+s_equiv EQ_label[MAX_EQ_LABEL];
+int EQ_label_n = 0;
+
+s_equiv EQ_word[MAX_EQ_WORD];
+int EQ_word_n = 0;
+
+
+// added by djame
+int spmrl_max_line_to_read=-1	;
+int spmrl_compact_view=0; // default : classic view
+int spmrl_compact_view40=0; // if one, prints <40 sentence in compact view
+int spmrl_count_bad_sent=0; // default no count
+int spmrl_print_filename=0; // default not to print name
+
+/************************/
+/* Function return-type */
+/************************/
+int main();
+void init_global();
+void print_head();
+void init();
+void read_parameter_file();
+void set_param();
+int narg();
+int read_line();
+
+void pushb();
+int popb();
+int stackempty();
+
+void calc_result(unsigned char *buf1,unsigned char *buf);
+void fix_quote();
+void reinsert_term();
+void massage_data();
+int massage_data_gold_only(); // djame: non destructive 
+void modify_label();
+void individual_result();
+void print_total();
+void dsp_info();
+int my_isspace(char c); // Djamé: added for debugging' sake
+
+int is_terminator();
+int is_deletelabel();
+int is_deletelabel_for_length();
+int is_quote_term();
+int word_comp();
+int label_comp();
+
+void Error();
+void Fatal();
+void Usage();
+
+/* ### provided by std headers 
+ int fprintf();
+ int printf();
+ int atoi();
+ int fclose();
+ int sscanf();
+ */
+
+/***********/
+/* program */
+/***********/
+#define ARG_CHECK(st) if(!(*++(*argv) || (--argc && *++argv))){ \
+fprintf(stderr,"Missing argument: %s\n",st); \
+}
+
+
+    char *filename1, *filename2;
+int
+main(argc,argv)
+int argc;
+char *argv[];
+{
+
+    FILE *fd1, *fd2;
+    unsigned char buff[MAX_SENT_LEN];
+    unsigned char buff1[MAX_SENT_LEN];
+	int quiet=0; // Djame
+	filename1=NULL;
+	filename2=NULL;
+	
+	
+    for(argc--,argv++;argc>0;argc--,argv++){
+		if(**argv == '-'){
+			while(*++(*argv)){
+				switch(**argv){
+						
+					case 'h':    /* help */
+						Usage();
+						exit(1);
+						
+					case 'd':      /* debug mode */
+						DEBUG = 1;
+						goto nextarg;
+						
+					case 'D':      /* debug mode */
+						DEBUG = 2;
+						goto nextarg;
+						
+					case 'c':      /* cut-off length */
+						ARG_CHECK("cut-off length for statistices");
+						TOT_cut_len = atoi(*argv);
+						fprintf(stderr,"cutoff %d\n",TOT_cut_len);
+						//exit(0);
+						goto nextarg;
+						
+
+					case 'e':      /* max error */
+						ARG_CHECK("number of error to kill");
+						Max_error = atoi(*argv);
+						goto nextarg;
+						
+					case 'p':      /* parameter file */
+						ARG_CHECK("parameter file");
+						read_parameter_file(*argv);
+						goto nextarg;
+					case 'K':
+						ARG_CHECK("Max nb of  sentences to read");
+						spmrl_max_line_to_read=atoi(*argv);
+						goto nextarg;
+					case 'L':  // added by djame to maintain compatibility with spmrl 2013 shared task's results extraction rules. 
+						spmrl_compact_view=1;
+						goto nextarg;
+					case 'l':  // added by djame to maintain compatibility with spmrl 2013 shared task's results extraction rules. 
+						spmrl_compact_view=1;
+						spmrl_compact_view40=1;
+						goto nextarg;
+					case 'X': // added by djame : count skipping sentences (()) as bad sentence 
+						spmrl_count_bad_sent=1;
+						goto nextarg;
+					case 'V': // added by djame to add gold_name vs test_file in the outpu
+						spmrl_print_filename=1;
+						goto nextarg;
+					default:
+						Usage();
+						exit(0);
+				}
+			}
+		} else {
+			if(filename1==NULL){
+				filename1 = *argv;
+			}else if(filename2==NULL){
+				filename2 = *argv;
+			}
+		}
+	nextarg: continue;
+    }
+	
+    init_global();
+	
+	
+    if((fd1 = fopen(filename1,"r"))==NULL){
+		Fatal("Can't open gold file (%s)\n",filename1);
+    }
+    if((fd2 = fopen(filename2,"r"))==NULL){
+		Fatal("Can't open test file (%s)\n",filename2);
+    }
+	
+    print_head();
+	
+    for(Line=1;fgets(buff,MAX_SENT_LEN,fd1)!=NULL;Line++){
+		
+		init();
+		
+		/* READ 1 */
+		r_wn1 = read_line(buff,terminal1,quotterm1,&wn1,bracket1,&bn1);
+		
+		strcpy(buff1,buff);
+		
+		/* READ 2 */
+		if(fgets(buff,MAX_SENT_LEN,fd2)==NULL){
+			Error("Number of lines unmatch (too many lines in gold file)\n");
+			break;
+		}
+		
+		read_line(buff,terminal2,quotterm2,&wn2,bracket2,&bn2);
+				
+		/* Calculate result and print it */
+		calc_result(buff1,buff);
+		
+		if(DEBUG>=1){
+			dsp_info();
+		}
+		// Added by djame
+		if (spmrl_max_line_to_read!=-1){
+			if ((Line+1) > spmrl_max_line_to_read ){
+				quiet=1;
+				break; // evaluate only  spmrl_max_line_to_read -1 (to keep compatibility with lines )
+			}
+		}
+
+    }
+	
+    if( (quiet==0) && (fgets(buff,MAX_SENT_LEN,fd2)!=NULL)){
+		Error("Number of lines unmatch (too many lines in test file)\n");
+    }
+	
+    print_total();
+	
+    return (0);
+}
+
+
+/*-----------------------------*/
+/* initialize global variables */
+/*-----------------------------*/
+void
+init_global()
+{
+    TOTAL_bn1 = TOTAL_bn2 = TOTAL_match = 0;
+    TOTAL_sent = TOTAL_error_sent = TOTAL_skip_sent = TOTAL_comp_sent = 0;
+    TOTAL_word = TOTAL_correct_tag = 0;
+    TOTAL_crossing = 0;
+    TOTAL_no_crossing = TOTAL_2L_crossing = 0;
+	
+    TOT40_bn1 = TOT40_bn2 = TOT40_match = 0;
+    TOT40_sent = TOT40_error_sent = TOT40_skip_sent = TOT40_comp_sent = 0;
+    TOT40_word = TOT40_correct_tag = 0;
+    TOT40_crossing = 0;
+    TOT40_no_crossing = TOT40_2L_crossing = 0;
+	
+}
+
+
+/*------------------*/
+/* print head title */
+/*------------------*/
+void
+print_head()
+{
+    printf("  Sent.                        Matched  Bracket   Cross        Correct Tag\n");
+    printf(" ID  Len.  Stat. Recal  Prec.  Bracket gold test Bracket Words  Tags Accracy\n");
+    printf("============================================================================\n");
+}
+
+
+/*-----------------------------------------------*/
+/* initialization at each individual computation */
+/*-----------------------------------------------*/
+void
+init()
+{
+	int i;
+	
+	wn1 = 0;
+	wn2 = 0;
+	bn1 = 0;
+	bn2 = 0;
+	r_bn1 = 0;
+	r_bn2 = 0;
+	
+	for(i=0;i<MAX_WORD_IN_SENT;i++){
+		terminal1[i].word[0]  = '\0';
+		terminal1[i].label[0] = '\0';
+		terminal1[i].result   = 9;
+		terminal2[i].word[0]  = '\0';
+		terminal2[i].label[0] = '\0';
+		terminal2[i].result   = 9;
+	}
+	
+	for(i=0;i<MAX_QUOTE_TERM;i++){
+		quotterm1[i].term.word[0]  = '\0';
+		quotterm1[i].term.label[0] = '\0';
+		quotterm1[i].term.result   = 9;
+		quotterm1[i].index         = -1;
+		quotterm1[i].bracket       = -1;
+		quotterm2[i].term.word[0]  = '\0';
+		quotterm2[i].term.label[0] = '\0';
+		quotterm2[i].term.result   = 9;
+		quotterm2[i].index         = -1;
+		quotterm2[i].bracket       = -1;
+	}
+	
+	for(i=0;i<MAX_BRACKET_IN_SENT;i++){
+		bracket1[i].start    = -1;
+		bracket1[i].end      = -1;
+		bracket1[i].label[0] = '\0';
+		bracket1[i].result   = 9;
+		bracket2[i].start    = -1;
+		bracket2[i].end      = -1;
+		bracket2[i].label[0] = '\0';
+		bracket2[i].result   = 9;
+	}
+	
+	Status = 0;
+}
+
+/*----------------*/
+/* parameter file */
+/*----------------*/
+void
+read_parameter_file(filename)
+char *filename;
+{
+    char buff[MAX_LINE_LEN];
+    FILE *fd;
+    int line;
+    int i;
+	
+    if((fd=fopen(filename,"r"))==NULL){
+		Fatal("Can't open parameter file (%s)\n",filename);
+    }
+	
+    for(line=1;fgets(buff,MAX_LINE_LEN,fd)!=NULL;line++){
+		
+		/* clean up the tail and find unvalid line */
+		/*-----------------------------------------*/
+		for(i=strlen(buff)-1;i>0 && (isspace(buff[i]) || buff[i]=='\n');i--){
+			buff[i]='\0';
+		}
+		if(buff[0]=='#' ||      /* comment-line */
+		   strlen(buff)<3){     /* too short, just ignore */
+			continue;
+		}
+		
+		/* place the parameter and value */
+		/*-------------------------------*/
+		for(i=0;!isspace(buff[i]);i++);
+		for(;isspace(buff[i]) && buff[i]!='\0';i++);
+		if(buff[i]=='\0'){
+			fprintf(stderr,"Empty value in parameter file (%d)\n",line);
+		}
+		
+		/* set parameter and value */
+		/*-------------------------*/
+		set_param(buff,buff+i);
+    }
+	
+    fclose(fd);
+}
+
+
+#define STRNCMP(s) (strncmp(param,s,strlen(s))==0 &&  \
+(param[strlen(s)]=='\0' || isspace(param[strlen(s)])))
+
+
+void
+set_param(param,value)
+char *param, *value;
+{
+    char l1[MAX_LABEL_LEN], l2[MAX_LABEL_LEN];
+	
+    if(STRNCMP("DEBUG")){
+		
+		DEBUG = atoi(value);
+		
+    }else if(STRNCMP("MAX_ERROR")){
+		
+		Max_error = atoi(value);
+		
+    }else if(STRNCMP("CUTOFF_LEN")){
+		
+		TOT_cut_len = atoi(value);
+		
+    }else if(STRNCMP("LABELED")){
+		
+		F_label = atoi(value);
+		
+    }else if(STRNCMP("DELETE_LABEL")){
+		
+		Delete_label[Delete_label_n] = (char *)malloc(strlen(value)+1);
+		strcpy(Delete_label[Delete_label_n],value);
+		Delete_label_n++;
+		
+    }else if(STRNCMP("DELETE_LABEL_FOR_LENGTH")){
+		
+		Delete_label_for_length[Delete_label_for_length_n] = (char *)malloc(strlen(value)+1);
+		strcpy(Delete_label_for_length[Delete_label_for_length_n],value);
+		Delete_label_for_length_n++;
+		
+    }else if(STRNCMP("QUOTE_LABEL")){
+		
+		Quote_term[Quote_term_n] = (char *)malloc(strlen(value)+1);
+		strcpy(Quote_term[Quote_term_n],value);
+		Quote_term_n++;
+		
+    }else if(STRNCMP("EQ_LABEL")){
+		
+		if(narg(value)!=2){
+			fprintf(stderr,"EQ_LABEL requires two values\n");
+			return;
+		}
+		sscanf(value,"%s %s",l1,l2);
+		EQ_label[EQ_label_n].s1 = (char *)malloc(strlen(l1)+1);
+		strcpy(EQ_label[EQ_label_n].s1,l1);
+		EQ_label[EQ_label_n].s2 = (char *)malloc(strlen(l2)+1);
+		strcpy(EQ_label[EQ_label_n].s2,l2);
+		EQ_label_n++;
+		
+    }else if(STRNCMP("EQ_WORD")){
+		
+		if(narg(value)!=2){
+			fprintf(stderr,"EQ_WORD requires two values\n");
+			return;
+		}
+		sscanf(value,"%s %s",l1,l2);
+		EQ_word[EQ_word_n].s1 = (char *)malloc(strlen(l1)+1);
+		strcpy(EQ_word[EQ_word_n].s1,l1);
+		EQ_word[EQ_word_n].s2 = (char *)malloc(strlen(l2)+1);
+		strcpy(EQ_word[EQ_word_n].s2,l2);
+		EQ_word_n++;
+		
+    }else{
+		
+		fprintf(stderr,"Unknown keyword (%s) in parameter file\n",param);
+		
+    }
+}
+
+
+int
+narg(s)
+char *s;
+{
+    int n;
+	
+    for(n=0;*s!='\0';){
+		for(;isspace(*s);s++);
+		if(*s=='\0'){
+			break;
+		}
+		n++;
+		for(;!isspace(*s);s++){
+			if(*s=='\0'){
+				break;
+			}
+		}
+    }
+	
+    return(n);
+}
+
+/*-----------------------------*/
+/* Read line and gather data.  */
+/* Return langth of sentence.  */
+/*-----------------------------*/
+int
+read_line(buff, terminal, quotterm, wn, bracket, bn)
+char *buff;
+s_terminal terminal[];
+s_term_ind quotterm[];
+int *wn;
+s_bracket bracket[];
+int *bn;
+{
+    char *p, *q, label[MAX_LABEL_LEN], word[MAX_WORD_LEN];
+    int	  qt;		  /* quote term counter */
+    int   wid, bid;       /* word ID, bracket ID */
+    int   n;              /* temporary remembering the position */
+    int   b;              /* temporary remembering bid */
+    int   i;
+    int   len;            /* length of the sentence */
+	
+    len = 0;
+    stack_top=0;
+	
+    for(p=buff,qt=0,wid=0,bid=0;*p!='\0';){
+		
+		if(isspace(*p)){
+			p++;
+			continue;
+			
+			/* open bracket */
+			/*--------------*/
+		}else if(*p=='('){
+			
+			n=wid;
+			for(p++,i=0;!is_terminator(*p);p++,i++){
+				label[i]=*p;
+			}
+			label[i]='\0';
+			
+			/* Find terminals */
+			q = p;
+			if(isspace(*q)){
+				for(q++;isspace(*q);q++);
+				for(i=0;!is_terminator(*q);q++,i++){
+					word[i]=*q;
+				}
+				word[i]='\0';
+				
+                /* compute length */
+				if(*q==')' && !is_deletelabel_for_length(label)==1){
+					len++;
+				}
+				if (DEBUG>1)
+					printf("label=%s, word=%s, wid=%d\n",label,word,wid);
+				/* quote terminal */
+				if(*q==')' && is_quote_term(label,word)==1){
+					strcpy(quotterm[qt].term.word,word);
+					strcpy(quotterm[qt].term.label,label);
+					quotterm[qt].index = wid;
+					quotterm[qt].bracket = bid;
+					quotterm[qt].endslen = stack_top;
+					//quotterm[qt].ends = (int*)malloc(stack_top*sizeof(int));
+					memcpy(quotterm[qt].ends,stack,stack_top*sizeof(int));
+					qt++;
+				}
+				
+				/* Slav: do not delete terminals */
+                /* delete terminal */
+				//if(*q==')' && is_deletelabel(label)==1){
+				//    p = q+1;
+				//    continue;
+				
+				/* valid terminal */
+				//}else 
+				if(*q==')'){
+					strcpy(terminal[wid].word,word);
+					strcpy(terminal[wid].label,label);
+					wid++;
+					p = q+1;
+					continue;
+					
+					/* error */
+				}else if(*q!='('){
+					fprintf(stderr,"debug djam: q= %s\n",q);
+					Error("More than two elements in a bracket\n");
+				}
+			}
+			
+            /* otherwise non-terminal label */
+			bracket[bid].start = wid;
+			bracket[bid].buf_start = p-buff;
+			strcpy(bracket[bid].label,label);
+			pushb(bid);
+			bid++;
+			
+			/* close bracket */
+			/*---------------*/
+		}else if(*p==')'){
+			
+			b = popb();
+			bracket[b].end = wid;
+			bracket[b].buf_end = p-buff;
+			p++;
+			
+			/* error */
+			/*-------*/
+		}else{
+			
+			Error("Reading sentence\n");
+		}
+    }
+	
+    if(!stackempty()){
+		Error("Bracketing is unbalanced (too many open bracket)\n");
+    }
+	
+    *wn = wid;
+    *bn = bid;
+	
+    return(len);
+}
+
+
+/*----------------------*/
+/* stack operation      */
+/* for bracketing pairs */
+/*----------------------*/
+void
+pushb(item)
+int item;
+{
+    stack[stack_top++]=item;
+}
+
+int
+popb()
+{
+    int item;
+	
+    item = stack[stack_top-1];
+	
+    if(stack_top-- < 0){
+		Error("Bracketing unbalance (too many close bracket)\n");
+    }
+    return(item);
+}
+
+int
+stackempty()
+{
+    if(stack_top==0){
+		return(1);
+    }else{
+		return(0);
+    }
+}
+
+
+/*------------------*/
+/* calculate result */
+/*------------------*/
+void
+calc_result(unsigned char *buf1,unsigned char *buf)
+{
+    int i, j, l;
+    int match, crossing, correct_tag;
+	
+    int last_i = -1;
+	
+    char my_buf[10000]; //djame: was 1000
+    int match_found = 0;
+    
+    char match_j[2000]; //djame was : 200
+    for (j = 0; j < bn2; ++j) {
+		match_j[j] = 0;
+    }
+	
+    /* ML */
+    if (DEBUG>1)
+    	printf("\n");
+	
+	
+    /* Find skip and error */
+    /*---------------------*/
+    if(wn2==0){ // Djame: case of empty lines
+		if (spmrl_count_bad_sent==1){
+			Status = 3; 
+			//individual_result(wn1,r_bn1,r_bn2,match,crossing,correct_tag);
+			int n_bracket_gold=massage_data_gold_only();
+			r_bn1=n_bracket_gold;
+			individual_result(wn1,n_bracket_gold,0,0,0,0); // testing the case of missing analysis was 0,0
+		}else {
+			Status=2;
+			individual_result(0,0,0,0,0,0);
+		}
+
+		return;
+    }
+	
+	if(wn1 != wn2){
+		//if (DEBUG>1)
+		//Error("Length unmatch (%d|%d)\n",wn1,wn2);
+		fix_quote();
+		if(wn1 != wn2){
+			individual_result(0,0,0,0,0,0); 
+			/* Slav: ignore 1 word sentences */
+			if (wn1 > 1) {
+				Error("Length unmatch (%d|%d)\n",wn1,wn2);
+				return;
+			}
+		}
+    }
+	
+    for(i=0;i<wn1;i++){
+		if(word_comp(terminal1[i].word,terminal2[i].word)==0){
+			Error("Words unmatch (%s|%s)\n",terminal1[i].word,
+				  terminal2[i].word);
+			individual_result(0,0,0,0,0,0);
+			return;
+		}
+    }
+	
+    /* massage the data */
+    /*------------------*/
+    massage_data();
+	
+    /* matching brackets */
+    /*-------------------*/
+    match = 0;
+    for(i=0;i<bn1;i++){
+		for(j=0;j<bn2;j++){
+			
+			if (DEBUG>1)
+				printf("1.res=%d, 2.res=%d, 1.start=%d, 2.start=%d, 1.end=%d, 2.end=%d\n",bracket1[i].result,bracket2[j].result,bracket1[i].start,bracket2[j].start,bracket1[i].end,bracket2[j].end);
+			
+			// does bracket match?
+			if(bracket1[i].result != 5 && 
+			   bracket2[j].result == 0 &&
+			   bracket1[i].start == bracket2[j].start && bracket1[i].end == bracket2[j].end) {
+				
+				// (1) do we not care about the label or (2) does the label match?
+				if (F_label==0 || label_comp(bracket1[i].label,bracket2[j].label)==1) {
+					bracket1[i].result = bracket2[j].result = 1;
+					match++;
+					match_found = 1;
+					break;
+				} else {
+					if (DEBUG>1) {
+						printf("  LABEL[%d-%d]: ",bracket1[i].start,bracket1[i].end-1);
+						l = bracket1[i].buf_end-bracket1[i].buf_start;
+						strncpy(my_buf,buf1+bracket1[i].buf_start,l);
+						my_buf[l] = '\0';
+						printf("%s\n",my_buf);
+					}
+					match_found = 1;
+					match_j[j] = 1;
+				}
+			}
+		}
+		
+		if (!match_found && bracket1[i].result != 5 && DEBUG>1) {
+			/* ### ML 09/28/03: gold bracket with no corresponding test bracket */
+			printf("  BRACKET[%d-%d]: ",bracket1[i].start,bracket1[i].end-1);
+			l = bracket1[i].buf_end-bracket1[i].buf_start;
+			strncpy(my_buf,buf1+bracket1[i].buf_start,l);
+			my_buf[l] = '\0';
+			printf("%s\n",my_buf);
+		}
+		match_found = 0;
+    }
+	
+    for(j=0;j<bn2;j++){
+		if (bracket2[j].result==0 && !match_j[j] && DEBUG>1) {
+			/* test bracket with no corresponding gold bracket */
+			printf("  EXTRA[%d-%d]: ",bracket2[j].start,bracket2[j].end-1);
+			l = bracket2[j].buf_end-bracket2[j].buf_start;
+			strncpy(my_buf,buf+bracket2[j].buf_start,l);
+			my_buf[l] = '\0';
+			printf("%s\n",my_buf);
+		}
+    }
+	
+    /* crossing */
+    /*----------*/
+    crossing = 0;
+	
+    /* crossing is counted based on the brackets */
+    /* in test rather than gold file (by Mike)   */
+    for(j=0;j<bn2;j++){
+		for(i=0;i<bn1;i++){
+			if(bracket1[i].result != 5 &&
+			   bracket2[j].result != 5 &&
+			   ((bracket1[i].start < bracket2[j].start &&
+				 bracket1[i].end   > bracket2[j].start &&
+				 bracket1[i].end   < bracket2[j].end) ||
+				(bracket1[i].start > bracket2[j].start &&
+				 bracket1[i].start < bracket2[j].end &&
+				 bracket1[i].end   > bracket2[j].end))){
+					
+					/* ### ML 09/01/03: get details on cross-brackettings */
+					if (i != last_i) {
+						if (DEBUG>1) {
+							printf("  CROSSING[%d-%d]: ",bracket1[i].start,bracket1[i].end-1);
+							l = bracket1[i].buf_end-bracket1[i].buf_start;
+							strncpy(my_buf,buf1+bracket1[i].buf_start,l);
+							my_buf[l] = '\0';
+							printf("%s\n",my_buf);
+							
+							/* ML
+							 printf("\n  CROSSING at bracket %d:\n",i-1);
+							 printf("  GOLD (tokens %d-%d): ",bracket1[i].start,bracket1[i].end-1);
+							 l = bracket1[i].buf_end-bracket1[i].buf_start;
+							 strncpy(my_buf,buf1+bracket1[i].buf_start,l);
+							 my_buf[l] = '\0';
+							 printf("%s\n",my_buf);
+							 */
+						}
+						last_i = i;
+					}
+					
+					/* ML
+					 printf("  TEST (tokens %d-%d): ",bracket2[j].start,bracket2[j].end-1);
+					 l = bracket2[j].buf_end-bracket2[j].buf_start;
+					 strncpy(my_buf,buf+bracket2[j].buf_start,l);
+					 my_buf[l] = '\0';
+					 printf("%s\n",my_buf);
+					 */
+					
+					crossing++;
+					break;
+				}
+		}
+    }
+	
+    /* Tagging accuracy */
+    /*------------------*/
+    correct_tag=0;
+    for(i=0;i<wn1;i++){
+		if(label_comp(terminal1[i].label,terminal2[i].label)==1){
+			terminal1[i].result = terminal2[i].result = 1;
+			correct_tag++;
+		} else {
+			terminal1[i].result = terminal2[i].result = 0;
+		}
+    }
+	
+    individual_result(wn1,r_bn1,r_bn2,match,crossing,correct_tag);
+}
+
+void
+fix_quote()
+{
+	int i,j,k;
+	if (DEBUG>1) {
+		for(i=0;i<MAX_QUOTE_TERM;i++){
+			if (quotterm1[i].index!=-1)
+				printf("%d: %s - %s\n",quotterm1[i].index,
+					   quotterm1[i].term.label,
+					   quotterm1[i].term.word);
+			if (quotterm2[i].index!=-1)
+				printf("%d: %s - %s\n",quotterm2[i].index,
+					   quotterm2[i].term.label,
+					   quotterm2[i].term.word);
+		}
+	}
+	for(i=0;i<MAX_QUOTE_TERM;i++) {
+		int ind = quotterm2[i].index;
+		if (ind!=-1) {
+			for(j=0;j<MAX_QUOTE_TERM;j++){
+				if (quotterm1[j].index==ind &&
+					strcmp(quotterm1[j].term.label,
+						   quotterm2[i].term.label)!=0) {
+						if (is_deletelabel(quotterm1[j].term.label) && !is_deletelabel(quotterm2[i].term.label)) {
+							reinsert_term(&quotterm1[j],terminal1,bracket1,&wn1);
+							for (k=j;k<MAX_QUOTE_TERM;k++)
+								if (quotterm1[k].index!=-1)
+									quotterm1[k].index++;
+						} else if (is_deletelabel(quotterm2[i].term.label) && !is_deletelabel(quotterm1[j].term.label)) {
+							reinsert_term(&quotterm2[i],terminal2,bracket2,&wn2);
+							for (k=i;k<MAX_QUOTE_TERM;k++)
+								if (quotterm2[k].index!=-1)
+									quotterm2[k].index++;
+						}
+					}
+			}
+		} else break;
+	}
+}
+
+void
+reinsert_term(quot,terminal,bracket,wn)
+s_term_ind* quot;
+s_terminal terminal[];
+s_bracket bracket[];
+int* wn;
+{
+	int ind = quot->index;
+	int bra = quot->bracket;
+	s_terminal* term = &quot->term;
+	int k;
+	memmove(&terminal[ind+1],
+			&terminal[ind],
+			sizeof(s_terminal)*(MAX_WORD_IN_SENT-ind-1));
+	strcpy(terminal[ind].label,term->label);
+	strcpy(terminal[ind].word,term->word);
+	(*wn)++;
+	if (DEBUG>1)
+		printf("bra=%d, ind=%d\n",bra,ind);
+	for(k=0;k<MAX_BRACKET_IN_SENT;k++) {
+		if (bracket[k].start==-1)
+			break;
+		if (DEBUG>1)
+			printf("bracket[%d]={%d,%d}\n",k,bracket[k].start,bracket[k].end);
+		if (k>=bra) {
+			bracket[k].start++;
+			bracket[k].end++;
+		}
+		//if (bracket[k].start<=ind && bracket[k].end>=ind)
+		//bracket[k].end++;
+	}
+	if (DEBUG>1)
+		printf("endslen=%d\n",quot->endslen);
+	for(k=0;k<quot->endslen;k++) {
+		//printf("ends[%d]=%d",k,quot->ends[k]);
+		bracket[quot->ends[k]].end++;
+	}
+	//free(quot->ends);
+}
+/*
+ void
+ adjust_end(ind,bra)
+ int ind;
+ int bra;
+ {
+ for(k=0;k<MAX_BRACKET_IN_SENT;k++) {
+ if (bracket[k].start==-1)
+ break;
+ printf("bracket[%d]={%d,%d}\n",k,bracket[k].start,bracket[k].end);
+ if (k>=bra)
+ bracket[k].end++;
+ }
+ }
+ */
+
+
+
+int massage_data_gold_only(){
+	int i, j;
+	int gold_valid_bracket=0;
+	char buflabel[MAX_LABEL_LEN]; // djame
+    /* for GOLD */
+    /*----------*/ 
+    for(i=0;i<bn1;i++){
+		
+		bracket1[i].result = 0;
+		
+		/* Zero element */
+		if(bracket1[i].start == bracket1[i].end){
+			//bracket1[i].result = bracket1[i].result; // was 5
+			continue;
+		}else {
+			gold_valid_bracket++;
+		}
+
+		
+        /* Modify label */
+		strcpy(buflabel,bracket1[i].label); //djame
+		modify_label(buflabel); // Djamé will be called twice
+		
+		/* Delete label */
+		for(j=0;j<Delete_label_n;j++){
+			if(label_comp(buflabel,Delete_label[j])!=1){
+				gold_valid_bracket++;
+			}
+		}
+    }	
+	
+	return gold_valid_bracket;
+}
+
+
+
+
+
+void
+massage_data()
+{
+    int i, j;
+	
+    /* for GOLD */
+    /*----------*/ 
+    for(i=0;i<bn1;i++){
+		
+		bracket1[i].result = 0;
+		
+		/* Zero element */
+		if(bracket1[i].start == bracket1[i].end){
+			bracket1[i].result = 5;
+			continue;
+		}
+		
+        /* Modify label */
+		modify_label(bracket1[i].label);
+		
+		/* Delete label */
+		for(j=0;j<Delete_label_n;j++){
+			if(label_comp(bracket1[i].label,Delete_label[j])==1){
+				bracket1[i].result = 5;
+			}
+		}
+    }
+	
+    /* for TEST */
+    /*----------*/ 
+    for(i=0;i<bn2;i++){
+		
+		bracket2[i].result = 0;
+		
+		/* Zero element */
+		if(bracket2[i].start == bracket2[i].end){
+			bracket2[i].result = 5;
+			continue;
+		}
+		
+        /* Modify label */
+		modify_label(bracket2[i].label);
+		
+		/* Delete label */
+		for(j=0;j<Delete_label_n;j++){
+			if(label_comp(bracket2[i].label,Delete_label[j])==1){
+				bracket2[i].result = 5;
+			}
+		}
+    }
+	
+	
+    /* count up real number of brackets (exclude deleted ones) */
+    /*---------------------------------------------------------*/
+    r_bn1 = r_bn2 = 0;
+	
+    for(i=0;i<bn1;i++){
+		if(bracket1[i].result != 5){
+			r_bn1++;
+		}
+    }
+	
+    for(i=0;i<bn2;i++){
+		if(bracket2[i].result != 5){
+			r_bn2++;
+		}
+    }
+}
+
+
+/*------------------------*/
+/* trim the tail of label */
+/*------------------------*/
+void
+modify_label(label)
+char *label;
+{
+    char *p;
+	
+    for(p=label;*p!='\0';p++){
+		if(*p=='-' || *p=='='|| *p=='#'){ // for dealing with  morph features
+			*p='\0';
+			break;
+		}
+    }
+}
+
+
+/*-----------------------------------------------*/
+/* add individual statistics to TOTAL statictics */
+/*-----------------------------------------------*/
+void
+individual_result(wn1,bn1,bn2,match,crossing,correct_tag)
+int wn1,bn1,bn2,match,crossing,correct_tag;
+{
+	
+    /* Statistics for ALL */
+    /*--------------------*/
+    TOTAL_sent++;
+    if(Status==1){
+		TOTAL_error_sent++;
+    }else if(Status==2){
+		TOTAL_skip_sent++;
+    }else{
+		TOTAL_bn1 += bn1;
+		TOTAL_bn2 += bn2;
+		TOTAL_match += match;
+		if(bn1==bn2 && bn2==match){
+			TOTAL_comp_sent++;
+		}
+		TOTAL_word += wn1;
+		TOTAL_crossing += crossing;
+		if(crossing==0){
+			TOTAL_no_crossing++;
+		}
+		if(crossing <= 2){
+			TOTAL_2L_crossing++;
+		}
+		TOTAL_correct_tag += correct_tag;
+    }
+	
+	
+    /* Statistics for sent length <= TOT_cut_len */
+    /*-------------------------------------------*/
+	//fprintf(stderr,"cut-off %d\n",TOT_cut_len);
+	//exit(0);
+    if(r_wn1<=TOT_cut_len){
+		TOT40_sent++;
+		if(Status==1){
+			TOT40_error_sent++;
+		}else if(Status==2){
+			TOT40_skip_sent++;
+		}else{
+			TOT40_bn1 += bn1;
+			TOT40_bn2 += bn2;
+			TOT40_match += match;
+			if(bn1==bn2 && bn2==match){
+				TOT40_comp_sent++;
+			}
+			TOT40_word += wn1;
+			TOT40_crossing += crossing;
+			if(crossing==0){
+				TOT40_no_crossing++;
+			}
+			if(crossing <= 2){
+				TOT40_2L_crossing++;
+			}
+			TOT40_correct_tag += correct_tag;
+		}
+    }
+	
+    /* Print individual result */
+    /*-------------------------*/
+    printf("%4d  %3d    %d  ",Line,r_wn1,Status);
+    printf("%6.2f %6.2f   %3d    %3d  %3d    %3d",
+		   (r_bn1==0?0.0:100.0*match/r_bn1), 
+		   (r_bn2==0?0.0:100.0*match/r_bn2),
+		   match, r_bn1, r_bn2, crossing);
+	
+    printf("   %4d  %4d   %6.2f\n",wn1,correct_tag,
+		   (wn1==0?0.0:100.0*correct_tag/wn1));
+}
+
+
+/*------------------------*/
+/* print total statistics */
+/*------------------------*/
+void
+print_total()
+{
+    int sentn;
+    double r,p,f;
+    FILE *file;
+	
+	
+	
+	r = TOTAL_bn1>0 ? 100.0*TOTAL_match/TOTAL_bn1 : 0.0;
+	p = TOTAL_bn2>0 ? 100.0*TOTAL_match/TOTAL_bn2 : 0.0;
+	f = 2*p*r/(p+r);
+	
+	if (spmrl_compact_view == 0){
+			
+		printf("============================================================================\n");
+		
+		if(TOTAL_bn1>0 && TOTAL_bn2>0){
+			printf("                %6.2f %6.2f %6d %5d %5d  %5d",
+				   (TOTAL_bn1>0?100.0*TOTAL_match/TOTAL_bn1:0.0),
+				   (TOTAL_bn2>0?100.0*TOTAL_match/TOTAL_bn2:0.0),
+				   TOTAL_match, 
+				   TOTAL_bn1, 
+				   TOTAL_bn2,
+				   TOTAL_crossing);
+		}
+		
+		printf("  %5d %5d   %6.2f",
+			   TOTAL_word,
+			   TOTAL_correct_tag,
+			   (TOTAL_word>0?100.0*TOTAL_correct_tag/TOTAL_word:0.0));
+		
+		printf("\n");
+		if (spmrl_print_filename==0){
+			printf("=== Summary ===\n");
+		}else {
+			printf("=== Summary: %s\tvs\t%s ===\n",filename1,filename2);
+		}
+
+		
+		sentn = TOTAL_sent - TOTAL_error_sent - TOTAL_skip_sent;
+		
+		printf("\n-- All --\n");
+		printf("Number of sentence        = %6d\n",TOTAL_sent);
+		printf("Number of Error sentence  = %6d\n",TOTAL_error_sent);
+		printf("Number of Skip  sentence  = %6d\n",TOTAL_skip_sent);
+		printf("Number of Valid sentence  = %6d\n",sentn);
+		
+		//r = TOTAL_bn1>0 ? 100.0*TOTAL_match/TOTAL_bn1 : 0.0;
+		printf("Bracketing Recall         = %6.2f\n",r);
+		
+	//	p = TOTAL_bn2>0 ? 100.0*TOTAL_match/TOTAL_bn2 : 0.0;
+		printf("Bracketing Precision      = %6.2f\n",p);
+		
+	//	f = 2*p*r/(p+r);
+		printf("Bracketing FMeasure       = %6.2f\n",f);
+		
+		printf("Complete match            = %6.2f\n",
+			   (sentn>0?100.0*TOTAL_comp_sent/sentn:0.0));
+		printf("Average crossing          = %6.2f\n",
+			   (sentn>0?1.0*TOTAL_crossing/sentn:0.0));
+		printf("No crossing               = %6.2f\n",
+			   (sentn>0?100.0*TOTAL_no_crossing/sentn:0.0));
+		printf("2 or less crossing        = %6.2f\n",
+			   (sentn>0?100.0*TOTAL_2L_crossing/sentn:0.0));
+		printf("Tagging accuracy          = %6.2f\n",
+			   (TOTAL_word>0?100.0*TOTAL_correct_tag/TOTAL_word:0.0));
+		
+		// Write stats also to a file.
+		file = fopen("status", "w");
+		fprintf(file, "---\n");
+		fprintf(file, "F1: %.2f\n", f);
+		fprintf(file, "LP: %.2f\n", p);
+		fprintf(file, "LR: %.2f\n", r);
+		fprintf(file, "POS: %.2f\n",
+				(TOTAL_word>0?100.0*TOTAL_correct_tag/TOTAL_word:0.0));
+		fprintf(file, "errorRate: %.2f\n", 100-f);
+		fclose(file);
+		
+		sentn = TOT40_sent - TOT40_error_sent - TOT40_skip_sent;
+		
+		printf("\n-- len<=%d --\n",TOT_cut_len);
+		printf("Number of sentence        = %6d\n",TOT40_sent);
+		printf("Number of Error sentence  = %6d\n",TOT40_error_sent);
+		printf("Number of Skip  sentence  = %6d\n",TOT40_skip_sent);
+		printf("Number of Valid sentence  = %6d\n",sentn);
+		
+		
+		r = TOT40_bn1>0 ? 100.0*TOT40_match/TOT40_bn1 : 0.0;
+		printf("Bracketing Recall         = %6.2f\n",r);
+		
+		p = TOT40_bn2>0 ? 100.0*TOT40_match/TOT40_bn2 : 0.0;
+		printf("Bracketing Precision      = %6.2f\n",p);
+		
+		f = 2*p*r/(p+r);
+		printf("Bracketing FMeasure       = %6.2f\n",f);
+		
+		printf("Complete match            = %6.2f\n",
+			   (sentn>0?100.0*TOT40_comp_sent/sentn:0.0));
+		printf("Average crossing          = %6.2f\n",
+			   (sentn>0?1.0*TOT40_crossing/sentn:0.0));
+		printf("No crossing               = %6.2f\n",
+			   (sentn>0?100.0*TOT40_no_crossing/sentn:0.0));
+		printf("2 or less crossing        = %6.2f\n",
+			   (sentn>0?100.0*TOT40_2L_crossing/sentn:0.0));
+		printf("Tagging accuracy          = %6.2f\n",
+			   (TOT40_word>0?100.0*TOT40_correct_tag/TOT40_word:0.0));
+	}else { // else spmrl_compact_view
+		if (spmrl_compact_view40 ==0){
+			double pos=(TOTAL_word>0?100.0*TOTAL_correct_tag/TOTAL_word:0.0);
+			sentn = TOTAL_sent - TOTAL_error_sent - TOTAL_skip_sent;
+		
+			double EX=(sentn>0?100.0*TOTAL_comp_sent/sentn:0.0);
+		
+			printf("F1: %6.2f %%\tPrec: %6.2f %%\tRec: %6.2f %%\t",f,r,p);
+			printf("POS: %6.2f %%\tEX: %6.2f %%\tUnparsed: %6d\tSent: %6d\tfile: %s\n",pos,EX,TOTAL_skip_sent+TOTAL_error_sent,TOTAL_sent,filename2);// ICI
+		}else {
+			
+			r = TOT40_bn1>0 ? 100.0*TOT40_match/TOT40_bn1 : 0.0;
+			p = TOT40_bn2>0 ? 100.0*TOT40_match/TOT40_bn2 : 0.0;
+			f = 2*p*r/(p+r);
+			double pos=(TOT40_word>0?100.0*TOT40_correct_tag/TOT40_word:0.0);
+			sentn = TOT40_sent - TOT40_error_sent - TOT40_skip_sent;
+			double EX=(sentn>0?100.0*TOT40_comp_sent/sentn:0.0);
+				
+			printf("F1: %6.2f %%\tPrec: %6.2f %%\tRec: %6.2f %%\t",f,r,p);
+			printf("POS: %6.2f %%\tEX: %6.2f %%\tUnparsed: %6d\tSent: %6d\tfile: %s\n",pos,EX,TOT40_skip_sent+TOT40_error_sent,TOT40_sent,filename2);// ICI<#statements#>
+		}
+
+	}
+
+}
+
+
+/*--------------------------------*/
+/* display individual information */
+/*--------------------------------*/
+void
+dsp_info()
+{
+	int i, n;
+	
+	printf("-<1>---(wn1=%3d, bn1=%3d)-           ",wn1,bn1);
+	printf("-<2>---(wn2=%3d, bn2=%3d)-\n",wn2,bn2);
+	
+	n = (wn1>wn2?wn1:wn2);
+	
+	for(i=0;i<n;i++){
+		if(terminal1[i].word[0]!='\0'){
+			printf("%3d : %d : %-6s  %-16s      ",i,terminal1[i].result,
+				   terminal1[i].label,terminal1[i].word);
+		}else{
+			printf("                                        ");
+		}
+		
+		if(terminal2[i].word[0]!='\0'){
+			printf("%3d : %d : %-6s  %-16s\n",i,terminal2[i].result,
+				   terminal2[i].label,terminal2[i].word);
+		}else{
+			printf("\n");
+		}
+	}
+	printf("\n");
+	
+	n = (bn1>bn2?bn1:bn2);
+	
+	for(i=0;i<n;i++){
+		if(bracket1[i].start != -1){
+			printf("%3d : %d : %3d  %3d  %-6s      ",i,bracket1[i].result,
+				   bracket1[i].start,bracket1[i].end,
+				   bracket1[i].label);
+		} else {
+			printf("                                ");
+		}
+		
+		if(bracket2[i].start != -1){
+			printf("%3d : %d : %3d  %3d  %-6s\n",i,bracket2[i].result,
+				   bracket2[i].start,bracket2[i].end,
+				   bracket2[i].label);
+		} else {
+			printf("\n");
+		}
+	}
+	printf("\n");
+	
+	printf("========\n");
+	
+}
+
+
+/*-----------------*/
+/* some predicates */
+/*-----------------*/
+
+
+// Djamé: reimplementing isspace (while digging bug in spmrl 2013 arabic gold dev line 616)
+int my_isspace(char c){
+	//	those are Posix's sapce : "\t\n\v\f\r"
+	//return (c==' ' || c=='\n');
+	return (c==' ' || c=='\t' || c=='\r' || c=='\n' || c=='\v' || c=='\f');
+}
+
+
+
+
+int
+is_terminator(c)
+char c;
+{
+    if(isspace(c) || c=='(' || c==')'){
+		return(1);
+    }else{
+		return(0);
+    }
+}
+
+int
+is_deletelabel(s)
+char *s;
+{
+    int i;
+	
+    for(i=0;i<Delete_label_n;i++){
+		if(strcmp(s,Delete_label[i])==0){
+			return(1);
+		}
+    }
+	
+    return(0);
+}
+
+int
+is_deletelabel_for_length(s)
+char *s;
+{
+    int i;
+	
+    for(i=0;i<Delete_label_for_length_n;i++){
+		if(strcmp(s,Delete_label_for_length[i])==0){
+			return(1);
+		}
+    }
+	
+    return(0);
+}
+
+int
+is_quote_term(s,w)
+char *s;
+char *w;
+{
+    int i;
+	
+    for(i=0;i<Quote_term_n;i++){
+		if(strcmp(s,Quote_term[i])==0){
+			// Djame : Arabic word contain quote
+			if (strcmp(w,"'")==0 || strcmp(w,"\"")==0 || strcmp(w,"/")==0)
+			//if (strcmp(w,"\"")==0 || strcmp(w,"/")==0)
+				return(1);
+		}
+    }
+	
+    return(0);
+}
+
+
+/*---------------*/
+/* compare words */
+/*---------------*/
+int
+word_comp(s1,s2)
+char *s1,*s2;
+{
+    int i;
+	
+    if(strcmp(s1,s2)==0){
+		return(1);
+    }
+	
+    for(i=0;i<EQ_word_n;i++){
+		if((strcmp(s1,EQ_word[i].s1)==0 &&
+			strcmp(s2,EQ_word[i].s2)==0) ||
+		   (strcmp(s1,EQ_word[i].s2)==0 &&
+			strcmp(s2,EQ_word[i].s1)==0)){
+			   return(1);
+		   }
+    }
+	
+    return(0);
+}
+
+/*----------------*/
+/* compare labels */
+/*----------------*/
+int
+label_comp(s1,s2)
+char *s1,*s2;
+{
+    int i;
+	// Added by djame for spmrl 2013 so pos tag got filtered too
+	
+	modify_label(s1); // djame
+	modify_label(s2); // djame
+    if(strcmp(s1,s2)==0){
+		return(1);
+    }
+	
+    for(i=0;i<EQ_label_n;i++){
+		if((strcmp(s1,EQ_label[i].s1)==0 &&
+			strcmp(s2,EQ_label[i].s2)==0) ||
+		   (strcmp(s1,EQ_label[i].s2)==0 &&
+			strcmp(s2,EQ_label[i].s1)==0)){
+			   return(1);
+		   }
+    }
+	
+    return(0);
+}
+
+
+/*--------*/
+/* errors */
+/*--------*/
+void
+Error(s,arg1,arg2,arg3)
+char *s, *arg1, *arg2, *arg3;
+{
+    Status = 1;
+    fprintf(stderr,"%d : ",Line);
+    fprintf(stderr,s,arg1,arg2,arg3);
+    if(Error_count++>Max_error){
+		exit(1);
+    }
+}
+
+
+/*---------------------*/
+/* fatal error to exit */
+/*---------------------*/
+void
+Fatal(s,arg1,arg2,arg3)
+char *s, *arg1, *arg2, *arg3;
+{
+    fprintf(stderr,s,arg1,arg2,arg3);
+    exit(1);
+}
+
+
+/*-------*/
+/* Usage */
+/*-------*/
+void
+Usage()
+{
+	fprintf(stderr," evalb [-dDh][-c n][-e n][-p param_file] gold-file test-file  \n");
+	fprintf(stderr,"                                                         \n");
+	fprintf(stderr,"    Evaluate bracketing in test-file against gold-file.  \n");
+	fprintf(stderr,"    Return recall, precision, F-Measure, tag accuracy.              \n");
+	fprintf(stderr,"                                                         \n");
+	fprintf(stderr,"  <option>                                               \n");
+	fprintf(stderr,"    -d             debug mode                            \n");
+	fprintf(stderr,"    -D             debug mode plus bracketing info       \n");
+	fprintf(stderr,"    -c n           cut-off length forstatistics (def.=40)\n");
+	fprintf(stderr,"    -e n           number of error to kill (default=10)  \n");
+	fprintf(stderr,"    -p param_file  parameter file                        \n");
+	fprintf(stderr,"    -K n		   Evaluate up to  n sentences                         \n");
+	fprintf(stderr,"    -X			   Count skipped sentences brackets as not parsed  \n");
+	fprintf(stderr,"    -L			   Compact view (for use in batch mode, all sentences  \n");
+	fprintf(stderr,"    -h    help                                           \n");
+}
diff --git a/parsing/EVALB_SPMRL/spmrl.prm b/parsing/EVALB_SPMRL/spmrl.prm
new file mode 100644
index 0000000000000000000000000000000000000000..610deb3371a0c15fd58f48fb17d8f90becd2b2fc
--- /dev/null
+++ b/parsing/EVALB_SPMRL/spmrl.prm
@@ -0,0 +1,91 @@
+##------------------------------------------##
+## Debug mode                               ##
+##   0: No debugging                        ##
+##   1: print data for individual sentence  ##
+##   2: print detailed bracketing info      ##
+##------------------------------------------##
+DEBUG 0
+
+##------------------------------------------##
+## MAX error                                ##
+##    Number of error to stop the process.  ##
+##    This is useful if there could be      ##
+##    tokanization error.                   ##
+##    The process will stop when this number##
+##    of errors are accumulated.            ##
+##------------------------------------------##
+MAX_ERROR 10000
+
+##------------------------------------------##
+## Cut-off length for statistics            ##
+##    At the end of evaluation, the         ##
+##    statistics for the senetnces of length##
+##    less than or equal to this number will##
+##    be shown, on top of the statistics    ##
+##    for all the sentences                 ##
+##------------------------------------------##
+CUTOFF_LEN 70
+
+##------------------------------------------##
+## unlabeled or labeled bracketing          ##
+##    0: unlabeled bracketing               ##
+##    1: labeled bracketing                 ##
+##------------------------------------------##
+LABELED 1
+
+##------------------------------------------##
+## Delete labels                            ##
+##    list of labels to be ignored.         ##
+##    If it is a pre-terminal label, delete ##
+##    the word along with the brackets.     ##
+##    If it is a non-terminal label, just   ##
+##    delete the brackets (don't delete     ##
+##    deildrens).                           ##
+##------------------------------------------##
+DELETE_LABEL TOP
+DELETE_LABEL ROOT
+DELETE_LABEL S1  
+DELETE_LABEL -NONE-
+DELETE_LABEL VROOT 
+
+#DELETE_LABEL ,
+#DELETE_LABEL :
+#DELETE_LABEL ``
+#DELETE_LABEL ''
+#DELETE_LABEL .
+#DELETE_LABEL ?
+#DELETE_LABEL !
+#DELETE_LABEL PONCT
+
+##------------------------------------------##
+## Delete labels for length calculation     ##
+##    list of labels to be ignored for      ##
+##    length calculation purpose            ##
+##------------------------------------------##
+DELETE_LABEL_FOR_LENGTH -NONE-
+
+##------------------------------------------##
+## Labels to be considered for misquote     ##
+##    (could be possesive or quote)         ##
+##------------------------------------------##
+#QUOTE_LABEL ``
+#QUOTE_LABEL ''
+#QUOTE_LABEL POS
+
+##------------------------------------------##
+## These ones are less common, but          ##
+##    are on occasion output by parsers:    ##      
+##------------------------------------------##
+#QUOTE_LABEL NN
+#QUOTE_LABEL CD
+#QUOTE_LABEL VBZ
+#QUOTE_LABEL :
+
+##------------------------------------------##
+## Equivalent labels, words                 ##
+##     the pairs are considered equivalent  ##
+##     This is non-directional.             ##
+##------------------------------------------##
+#EQ_LABEL ADVP PRT
+
+# EQ_WORD  Example example
diff --git a/parsing/EVALB_SPMRL/spmrl_hebrew.prm b/parsing/EVALB_SPMRL/spmrl_hebrew.prm
new file mode 100644
index 0000000000000000000000000000000000000000..d02323fc3067120aeeb825593767f8700bdad476
--- /dev/null
+++ b/parsing/EVALB_SPMRL/spmrl_hebrew.prm
@@ -0,0 +1,118 @@
+##------------------------------------------##
+## Debug mode                               ##
+##   0: No debugging                        ##
+##   1: print data for individual sentence  ##
+##   2: print detailed bracketing info      ##
+##------------------------------------------##
+DEBUG 0
+
+##------------------------------------------##
+## MAX error                                ##
+##    Number of error to stop the process.  ##
+##    This is useful if there could be      ##
+##    tokanization error.                   ##
+##    The process will stop when this number##
+##    of errors are accumulated.            ##
+##------------------------------------------##
+MAX_ERROR 10000
+
+##------------------------------------------##
+## Cut-off length for statistics            ##
+##    At the end of evaluation, the         ##
+##    statistics for the senetnces of length##
+##    less than or equal to this number will##
+##    be shown, on top of the statistics    ##
+##    for all the sentences                 ##
+##------------------------------------------##
+CUTOFF_LEN 40
+
+##------------------------------------------##
+## unlabeled or labeled bracketing          ##
+##    0: unlabeled bracketing               ##
+##    1: labeled bracketing                 ##
+##------------------------------------------##
+LABELED 1
+
+##------------------------------------------##
+## Delete labels                            ##
+##    list of labels to be ignored.         ##
+##    If it is a pre-terminal label, delete ##
+##    the word along with the brackets.     ##
+##    If it is a non-terminal label, just   ##
+##    delete the brackets (don't delete     ##
+##    deildrens).                           ##
+##------------------------------------------##
+DELETE_LABEL TOP
+DELETE_LABEL ROOT
+DELETE_LABEL S1  
+DELETE_LABEL -NONE-
+DELETE_LABEL VROOT 
+#DELETE_LABEL SENT
+
+#DELETE_LABEL ,
+#DELETE_LABEL :
+#DELETE_LABEL ``
+#DELETE_LABEL ''
+#DELETE_LABEL .
+#DELETE_LABEL ?
+#DELETE_LABEL !
+#DELETE_LABEL PONCT
+
+##------------------------------------------##
+## Delete labels for length calculation     ##
+##    list of labels to be ignored for      ##
+##    length calculation purpose            ##
+##------------------------------------------##
+DELETE_LABEL_FOR_LENGTH -NONE-
+
+##------------------------------------------##
+## Labels to be considered for misquote     ##
+##    (could be possesive or quote)         ##
+##------------------------------------------##
+#QUOTE_LABEL ``
+#QUOTE_LABEL ''
+#QUOTE_LABEL POS
+
+##------------------------------------------##
+## These ones are less common, but          ##
+##    are on occasion output by parsers:    ##      
+##------------------------------------------##
+#QUOTE_LABEL NN
+#QUOTE_LABEL CD
+#QUOTE_LABEL VBZ
+#QUOTE_LABEL :
+
+##------------------------------------------##
+## Equivalent labels, words                 ##
+##     the pairs are considered equivalent  ##
+##     This is non-directional.             ##
+##------------------------------------------##
+#EQ_LABEL ADVP PRT
+
+# EQ_WORD  Example example
+DELETE_LABEL SYN_NN
+DELETE_LABEL SYN_NNP
+DELETE_LABEL SYN_NNT
+DELETE_LABEL SYN_PRP
+DELETE_LABEL SYN_JJ
+DELETE_LABEL SYN_JJT
+DELETE_LABEL SYN_RB
+DELETE_LABEL SYN_RBR
+DELETE_LABEL SYN_MOD
+DELETE_LABEL SYN_VB
+DELETE_LABEL SYN_AUX
+DELETE_LABEL SYN_AGR
+DELETE_LABEL SYN_IN
+DELETE_LABEL SYN_COM
+DELETE_LABEL SYN_REL
+DELETE_LABEL SYN_CC
+DELETE_LABEL SYN_QW
+DELETE_LABEL SYN_HAM
+DELETE_LABEL SYN_WDT
+DELETE_LABEL SYN_DT
+DELETE_LABEL SYN_CD
+DELETE_LABEL SYN_CDT
+DELETE_LABEL SYN_AT
+DELETE_LABEL SYN_H
+DELETE_LABEL SYN_FL
+DELETE_LABEL SYN_ZVL
diff --git a/parsing/setup.py b/parsing/setup.py
new file mode 100644
index 0000000000000000000000000000000000000000..cbef1fde1d06a804c71f91816abc573b284f740b
--- /dev/null
+++ b/parsing/setup.py
@@ -0,0 +1,42 @@
+import setuptools
+import sys
+
+
+setuptools.setup(
+    name="benepar",
+    version="0.2.0",
+    author="Nikita Kitaev",
+    author_email="kitaev@cs.berkeley.edu",
+    description="Berkeley Neural Parser",
+    long_description=open("README.md", "r", encoding="utf-8").read(),
+    long_description_content_type="text/markdown",
+    url="https://github.com/nikitakit/self-attentive-parser",
+    package_dir={"": "src"},
+    packages=setuptools.find_packages("src"),
+    python_requires=">=3.6",
+    classifiers=(
+        "Intended Audience :: Developers",
+        "Intended Audience :: Education",
+        "Intended Audience :: Science/Research",
+        "License :: OSI Approved :: Apache Software License",
+        "Operating System :: OS Independent",
+        "Programming Language :: Python :: 3",
+        "Programming Language :: Python :: 3.6",
+        "Programming Language :: Python :: 3.7",
+        "Programming Language :: Python :: 3.8",
+        "Programming Language :: Python :: 3.9",
+        "Topic :: Scientific/Engineering :: Artificial Intelligence",
+        "Topic :: Text Processing :: Linguistic",
+    ),
+    install_requires=[
+        "nltk>=3.2",
+        "spacy>=2.0.9",
+        "torch>=1.6.0",
+        "torch-struct>=0.5",
+        "tokenizers>=0.9.4",
+        "transformers[torch,tokenizers]>=4.2.2",
+        "protobuf",
+        "sentencepiece>=0.1.91",
+        "dataclasses;python_version<'3.7'",
+    ],
+)
diff --git a/parsing/src/__pycache__/evaluate.cpython-310.pyc b/parsing/src/__pycache__/evaluate.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..515329d4ca5b6bc69936a57a39b54c96d4a4945c
Binary files /dev/null and b/parsing/src/__pycache__/evaluate.cpython-310.pyc differ
diff --git a/parsing/src/__pycache__/evaluate.cpython-37.pyc b/parsing/src/__pycache__/evaluate.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6ddc9a4ff8f02675dd15537926b903b776412033
Binary files /dev/null and b/parsing/src/__pycache__/evaluate.cpython-37.pyc differ
diff --git a/parsing/src/__pycache__/evaluate.cpython-38.pyc b/parsing/src/__pycache__/evaluate.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..347c81144b321bd2b92216d84e6bdcb5d6418f73
Binary files /dev/null and b/parsing/src/__pycache__/evaluate.cpython-38.pyc differ
diff --git a/parsing/src/__pycache__/learning_rates.cpython-310.pyc b/parsing/src/__pycache__/learning_rates.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..35b1e1331c8c35f83d50978fac5f8d46c546cf62
Binary files /dev/null and b/parsing/src/__pycache__/learning_rates.cpython-310.pyc differ
diff --git a/parsing/src/__pycache__/learning_rates.cpython-37.pyc b/parsing/src/__pycache__/learning_rates.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8d070d35d58fc8e0d66d4acc45368f1c70c2aec8
Binary files /dev/null and b/parsing/src/__pycache__/learning_rates.cpython-37.pyc differ
diff --git a/parsing/src/__pycache__/learning_rates.cpython-38.pyc b/parsing/src/__pycache__/learning_rates.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..99ccd0ebb12d39feaa29b565e2507eb24adb34ae
Binary files /dev/null and b/parsing/src/__pycache__/learning_rates.cpython-38.pyc differ
diff --git a/parsing/src/__pycache__/parse.cpython-38.pyc b/parsing/src/__pycache__/parse.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1538bb1c3f703ef8f5de7b5d60c9d2ef6a7cf75c
Binary files /dev/null and b/parsing/src/__pycache__/parse.cpython-38.pyc differ
diff --git a/parsing/src/__pycache__/transliterate.cpython-310.pyc b/parsing/src/__pycache__/transliterate.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ad0c522bdc45d26b136829452a84a81cc234b0d2
Binary files /dev/null and b/parsing/src/__pycache__/transliterate.cpython-310.pyc differ
diff --git a/parsing/src/__pycache__/transliterate.cpython-37.pyc b/parsing/src/__pycache__/transliterate.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..cad5da7e0787e576040443f3c0bc85ca6ca1da45
Binary files /dev/null and b/parsing/src/__pycache__/transliterate.cpython-37.pyc differ
diff --git a/parsing/src/__pycache__/transliterate.cpython-38.pyc b/parsing/src/__pycache__/transliterate.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b958599341fa33f438ede9f6f3ed169637f1d3f6
Binary files /dev/null and b/parsing/src/__pycache__/transliterate.cpython-38.pyc differ
diff --git a/parsing/src/__pycache__/treebanks.cpython-310.pyc b/parsing/src/__pycache__/treebanks.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1e2a8ff1b30628bcb3c0081e57ede7992d82ac76
Binary files /dev/null and b/parsing/src/__pycache__/treebanks.cpython-310.pyc differ
diff --git a/parsing/src/__pycache__/treebanks.cpython-37.pyc b/parsing/src/__pycache__/treebanks.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8605910ed0e1fd4db971366751833794c335c7d8
Binary files /dev/null and b/parsing/src/__pycache__/treebanks.cpython-37.pyc differ
diff --git a/parsing/src/__pycache__/treebanks.cpython-38.pyc b/parsing/src/__pycache__/treebanks.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e44b1e195a163ca01180f230ef1a44e1fd659752
Binary files /dev/null and b/parsing/src/__pycache__/treebanks.cpython-38.pyc differ
diff --git a/parsing/src/benepar/__init__.py b/parsing/src/benepar/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..4d6ad660648d0c407a0cda608e0e5fc027ca33c5
--- /dev/null
+++ b/parsing/src/benepar/__init__.py
@@ -0,0 +1,20 @@
+"""
+benepar: Berkeley Neural Parser
+"""
+
+# This file and all code in integrations/ relate to the version of the parser
+# released via PyPI. If you only need to run research experiments, it is safe
+# to delete the integrations/ folder and replace this __init__.py with an
+# empty file.
+
+__all__ = [
+    "Parser",
+    "InputSentence",
+    "download",
+    "BeneparComponent",
+    "NonConstituentException",
+]
+
+from .integrations.downloader import download
+from .integrations.nltk_plugin import Parser, InputSentence
+from .integrations.spacy_plugin import BeneparComponent, NonConstituentException
diff --git a/parsing/src/benepar/__pycache__/__init__.cpython-310.pyc b/parsing/src/benepar/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..45a19cfab4c95912135340975ee7c80d9e6653c9
Binary files /dev/null and b/parsing/src/benepar/__pycache__/__init__.cpython-310.pyc differ
diff --git a/parsing/src/benepar/__pycache__/__init__.cpython-37.pyc b/parsing/src/benepar/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4d77d11129d29385632095630e86529454267644
Binary files /dev/null and b/parsing/src/benepar/__pycache__/__init__.cpython-37.pyc differ
diff --git a/parsing/src/benepar/__pycache__/__init__.cpython-38.pyc b/parsing/src/benepar/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4b3c22ea33d82e0c7046d3170fad8b6baa533b72
Binary files /dev/null and b/parsing/src/benepar/__pycache__/__init__.cpython-38.pyc differ
diff --git a/parsing/src/benepar/__pycache__/char_lstm.cpython-310.pyc b/parsing/src/benepar/__pycache__/char_lstm.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..dc3c7ea69a2b62715f0b44ad8ebb01a9b76a1072
Binary files /dev/null and b/parsing/src/benepar/__pycache__/char_lstm.cpython-310.pyc differ
diff --git a/parsing/src/benepar/__pycache__/char_lstm.cpython-37.pyc b/parsing/src/benepar/__pycache__/char_lstm.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..660cc6476b5690ff29bf5f7529664bec9db83385
Binary files /dev/null and b/parsing/src/benepar/__pycache__/char_lstm.cpython-37.pyc differ
diff --git a/parsing/src/benepar/__pycache__/char_lstm.cpython-38.pyc b/parsing/src/benepar/__pycache__/char_lstm.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7c8f2c0905e28289d20d27613d0185876308cddd
Binary files /dev/null and b/parsing/src/benepar/__pycache__/char_lstm.cpython-38.pyc differ
diff --git a/parsing/src/benepar/__pycache__/decode_chart.cpython-310.pyc b/parsing/src/benepar/__pycache__/decode_chart.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3e3431b0d230bd82050ba9f408cb7cc8eddfaea1
Binary files /dev/null and b/parsing/src/benepar/__pycache__/decode_chart.cpython-310.pyc differ
diff --git a/parsing/src/benepar/__pycache__/decode_chart.cpython-37.pyc b/parsing/src/benepar/__pycache__/decode_chart.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..58c7245b3c10c4ac4a03e513b40e702e54bf6dd8
Binary files /dev/null and b/parsing/src/benepar/__pycache__/decode_chart.cpython-37.pyc differ
diff --git a/parsing/src/benepar/__pycache__/decode_chart.cpython-38.pyc b/parsing/src/benepar/__pycache__/decode_chart.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..049d3cf8367914407eeb686f38552ef41a107f82
Binary files /dev/null and b/parsing/src/benepar/__pycache__/decode_chart.cpython-38.pyc differ
diff --git a/parsing/src/benepar/__pycache__/nkutil.cpython-310.pyc b/parsing/src/benepar/__pycache__/nkutil.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a4e63dc023c3ea8621d9c30a145aa344c429e029
Binary files /dev/null and b/parsing/src/benepar/__pycache__/nkutil.cpython-310.pyc differ
diff --git a/parsing/src/benepar/__pycache__/nkutil.cpython-37.pyc b/parsing/src/benepar/__pycache__/nkutil.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2db77757aaf4cf3c9d6fc39ef488f8ceae44e031
Binary files /dev/null and b/parsing/src/benepar/__pycache__/nkutil.cpython-37.pyc differ
diff --git a/parsing/src/benepar/__pycache__/nkutil.cpython-38.pyc b/parsing/src/benepar/__pycache__/nkutil.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..268ddf1ef105e516e011f659ab47e9e4f3a8da26
Binary files /dev/null and b/parsing/src/benepar/__pycache__/nkutil.cpython-38.pyc differ
diff --git a/parsing/src/benepar/__pycache__/parse_base.cpython-310.pyc b/parsing/src/benepar/__pycache__/parse_base.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4c85df1217db38b533bbe05dd804240c97ed2f73
Binary files /dev/null and b/parsing/src/benepar/__pycache__/parse_base.cpython-310.pyc differ
diff --git a/parsing/src/benepar/__pycache__/parse_base.cpython-37.pyc b/parsing/src/benepar/__pycache__/parse_base.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f2011e6e6e6c9f547d20e43d5f496ebfe29a48a8
Binary files /dev/null and b/parsing/src/benepar/__pycache__/parse_base.cpython-37.pyc differ
diff --git a/parsing/src/benepar/__pycache__/parse_base.cpython-38.pyc b/parsing/src/benepar/__pycache__/parse_base.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6b4e671acbb199571f54ed2f294f4d83b8bc5418
Binary files /dev/null and b/parsing/src/benepar/__pycache__/parse_base.cpython-38.pyc differ
diff --git a/parsing/src/benepar/__pycache__/parse_chart.cpython-310.pyc b/parsing/src/benepar/__pycache__/parse_chart.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c508f6f3183df932a0201463dd700baae36b1972
Binary files /dev/null and b/parsing/src/benepar/__pycache__/parse_chart.cpython-310.pyc differ
diff --git a/parsing/src/benepar/__pycache__/parse_chart.cpython-37.pyc b/parsing/src/benepar/__pycache__/parse_chart.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..94b981d88bcbc5884b57712e367ce0caff0a2d8d
Binary files /dev/null and b/parsing/src/benepar/__pycache__/parse_chart.cpython-37.pyc differ
diff --git a/parsing/src/benepar/__pycache__/parse_chart.cpython-38.pyc b/parsing/src/benepar/__pycache__/parse_chart.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..06f9f7da11c32af739c9c11b0e7946263417e741
Binary files /dev/null and b/parsing/src/benepar/__pycache__/parse_chart.cpython-38.pyc differ
diff --git a/parsing/src/benepar/__pycache__/partitioned_transformer.cpython-310.pyc b/parsing/src/benepar/__pycache__/partitioned_transformer.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6db3303e19ba14903564c9461db9844040a9bb11
Binary files /dev/null and b/parsing/src/benepar/__pycache__/partitioned_transformer.cpython-310.pyc differ
diff --git a/parsing/src/benepar/__pycache__/partitioned_transformer.cpython-37.pyc b/parsing/src/benepar/__pycache__/partitioned_transformer.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..58454aa626b29a0d715096a0a094576700177d01
Binary files /dev/null and b/parsing/src/benepar/__pycache__/partitioned_transformer.cpython-37.pyc differ
diff --git a/parsing/src/benepar/__pycache__/partitioned_transformer.cpython-38.pyc b/parsing/src/benepar/__pycache__/partitioned_transformer.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e9fa54ea354af2b241a1abf6604fe2d88a5f7b7d
Binary files /dev/null and b/parsing/src/benepar/__pycache__/partitioned_transformer.cpython-38.pyc differ
diff --git a/parsing/src/benepar/__pycache__/ptb_unescape.cpython-310.pyc b/parsing/src/benepar/__pycache__/ptb_unescape.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f10e64bc86bcbbad136506cf917bda48c0dee1aa
Binary files /dev/null and b/parsing/src/benepar/__pycache__/ptb_unescape.cpython-310.pyc differ
diff --git a/parsing/src/benepar/__pycache__/ptb_unescape.cpython-37.pyc b/parsing/src/benepar/__pycache__/ptb_unescape.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5d6b52653a2dacc6450a2ebd7e69dd9d0f5d9423
Binary files /dev/null and b/parsing/src/benepar/__pycache__/ptb_unescape.cpython-37.pyc differ
diff --git a/parsing/src/benepar/__pycache__/ptb_unescape.cpython-38.pyc b/parsing/src/benepar/__pycache__/ptb_unescape.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6930264225736838d69ee94f92a951aac73ea499
Binary files /dev/null and b/parsing/src/benepar/__pycache__/ptb_unescape.cpython-38.pyc differ
diff --git a/parsing/src/benepar/__pycache__/retokenization.cpython-310.pyc b/parsing/src/benepar/__pycache__/retokenization.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..143dbcadd97f9b68aefc3f0d8d1f808d17fd85f6
Binary files /dev/null and b/parsing/src/benepar/__pycache__/retokenization.cpython-310.pyc differ
diff --git a/parsing/src/benepar/__pycache__/retokenization.cpython-37.pyc b/parsing/src/benepar/__pycache__/retokenization.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e5ac8095a1a2de0062fd5b84d767b411ae15a9fd
Binary files /dev/null and b/parsing/src/benepar/__pycache__/retokenization.cpython-37.pyc differ
diff --git a/parsing/src/benepar/__pycache__/retokenization.cpython-38.pyc b/parsing/src/benepar/__pycache__/retokenization.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..77bc038d3c2d99167804e5df13287f771f0b218d
Binary files /dev/null and b/parsing/src/benepar/__pycache__/retokenization.cpython-38.pyc differ
diff --git a/parsing/src/benepar/__pycache__/subbatching.cpython-310.pyc b/parsing/src/benepar/__pycache__/subbatching.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f9d8eceffd6ae995f21f1b4d9c2bb095b3a5c741
Binary files /dev/null and b/parsing/src/benepar/__pycache__/subbatching.cpython-310.pyc differ
diff --git a/parsing/src/benepar/__pycache__/subbatching.cpython-37.pyc b/parsing/src/benepar/__pycache__/subbatching.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f421b80679299e0a14e40db0572f5d3e5c560319
Binary files /dev/null and b/parsing/src/benepar/__pycache__/subbatching.cpython-37.pyc differ
diff --git a/parsing/src/benepar/__pycache__/subbatching.cpython-38.pyc b/parsing/src/benepar/__pycache__/subbatching.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c727114db8c68934266d6c296e91ba82498566fb
Binary files /dev/null and b/parsing/src/benepar/__pycache__/subbatching.cpython-38.pyc differ
diff --git a/parsing/src/benepar/char_lstm.py b/parsing/src/benepar/char_lstm.py
new file mode 100644
index 0000000000000000000000000000000000000000..0aefc5c18959865e9a75cbb476b21e0d2afd5678
--- /dev/null
+++ b/parsing/src/benepar/char_lstm.py
@@ -0,0 +1,160 @@
+"""
+Character LSTM implementation (matches https://arxiv.org/pdf/1805.01052.pdf)
+"""
+
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class CharacterLSTM(nn.Module):
+    def __init__(self, num_embeddings, d_embedding, d_out, char_dropout=0.0, **kwargs):
+        super().__init__()
+
+        self.d_embedding = d_embedding
+        self.d_out = d_out
+
+        self.lstm = nn.LSTM(
+            self.d_embedding, self.d_out // 2, num_layers=1, bidirectional=True
+        )
+
+        self.emb = nn.Embedding(num_embeddings, self.d_embedding, **kwargs)
+        self.char_dropout = nn.Dropout(char_dropout)
+
+    def forward(self, chars_packed, valid_token_mask):
+        inp_embs = nn.utils.rnn.PackedSequence(
+            self.char_dropout(self.emb(chars_packed.data)),
+            batch_sizes=chars_packed.batch_sizes,
+            sorted_indices=chars_packed.sorted_indices,
+            unsorted_indices=chars_packed.unsorted_indices,
+        )
+
+        _, (lstm_out, _) = self.lstm(inp_embs)
+        lstm_out = torch.cat([lstm_out[0], lstm_out[1]], -1)
+
+        # Switch to a representation where there are dummy vectors for invalid
+        # tokens generated by padding.
+        res = lstm_out.new_zeros(
+            (valid_token_mask.shape[0], valid_token_mask.shape[1], lstm_out.shape[-1])
+        )
+        res[valid_token_mask] = lstm_out
+        return res
+
+
+class RetokenizerForCharLSTM:
+    # Assumes that these control characters are not present in treebank text
+    CHAR_UNK = "\0"
+    CHAR_ID_UNK = 0
+    CHAR_START_SENTENCE = "\1"
+    CHAR_START_WORD = "\2"
+    CHAR_STOP_WORD = "\3"
+    CHAR_STOP_SENTENCE = "\4"
+
+    def __init__(self, char_vocab):
+        self.char_vocab = char_vocab
+
+    @classmethod
+    def build_vocab(cls, sentences):
+        char_set = set()
+        for sentence in sentences:
+            if isinstance(sentence, tuple):
+                sentence = sentence[0]
+            for word in sentence:
+                char_set |= set(word)
+
+        # If codepoints are small (e.g. Latin alphabet), index by codepoint
+        # directly
+        highest_codepoint = max(ord(char) for char in char_set)
+        if highest_codepoint < 512:
+            if highest_codepoint < 256:
+                highest_codepoint = 256
+            else:
+                highest_codepoint = 512
+
+            char_vocab = {}
+            # This also takes care of constants like CHAR_UNK, etc.
+            for codepoint in range(highest_codepoint):
+                char_vocab[chr(codepoint)] = codepoint
+            return char_vocab
+        else:
+            char_vocab = {}
+            char_vocab[cls.CHAR_UNK] = 0
+            char_vocab[cls.CHAR_START_SENTENCE] = 1
+            char_vocab[cls.CHAR_START_WORD] = 2
+            char_vocab[cls.CHAR_STOP_WORD] = 3
+            char_vocab[cls.CHAR_STOP_SENTENCE] = 4
+            for id_, char in enumerate(sorted(char_set), start=5):
+                char_vocab[char] = id_
+            return char_vocab
+
+    def __call__(self, words, space_after="ignored", return_tensors=None):
+        if return_tensors != "np":
+            raise NotImplementedError("Only return_tensors='np' is supported.")
+
+        res = {}
+
+        # Sentence-level start/stop tokens are encoded as 3 pseudo-chars
+        # Within each word, account for 2 start/stop characters
+        max_word_len = max(3, max(len(word) for word in words)) + 2
+        char_ids = np.zeros((len(words) + 2, max_word_len), dtype=int)
+        word_lens = np.zeros(len(words) + 2, dtype=int)
+
+        char_ids[0, :5] = [
+            self.char_vocab[self.CHAR_START_WORD],
+            self.char_vocab[self.CHAR_START_SENTENCE],
+            self.char_vocab[self.CHAR_START_SENTENCE],
+            self.char_vocab[self.CHAR_START_SENTENCE],
+            self.char_vocab[self.CHAR_STOP_WORD],
+        ]
+        word_lens[0] = 5
+        for i, word in enumerate(words, start=1):
+            char_ids[i, 0] = self.char_vocab[self.CHAR_START_WORD]
+            for j, char in enumerate(word, start=1):
+                char_ids[i, j] = self.char_vocab.get(char, self.CHAR_ID_UNK)
+            char_ids[i, j + 1] = self.char_vocab[self.CHAR_STOP_WORD]
+            word_lens[i] = j + 2
+        char_ids[i + 1, :5] = [
+            self.char_vocab[self.CHAR_START_WORD],
+            self.char_vocab[self.CHAR_STOP_SENTENCE],
+            self.char_vocab[self.CHAR_STOP_SENTENCE],
+            self.char_vocab[self.CHAR_STOP_SENTENCE],
+            self.char_vocab[self.CHAR_STOP_WORD],
+        ]
+        word_lens[i + 1] = 5
+
+        res["char_ids"] = char_ids
+        res["word_lens"] = word_lens
+        res["valid_token_mask"] = np.ones_like(word_lens, dtype=bool)
+
+        return res
+
+    def pad(self, examples, return_tensors=None):
+        if return_tensors != "pt":
+            raise NotImplementedError("Only return_tensors='pt' is supported.")
+        max_word_len = max(example["char_ids"].shape[-1] for example in examples)
+        char_ids = torch.cat(
+            [
+                F.pad(
+                    torch.tensor(example["char_ids"]),
+                    (0, max_word_len - example["char_ids"].shape[-1]),
+                )
+                for example in examples
+            ]
+        )
+        word_lens = torch.cat(
+            [torch.tensor(example["word_lens"]) for example in examples]
+        )
+        valid_token_mask = nn.utils.rnn.pad_sequence(
+            [torch.tensor(example["valid_token_mask"]) for example in examples],
+            batch_first=True,
+            padding_value=False,
+        )
+
+        char_ids = nn.utils.rnn.pack_padded_sequence(
+            char_ids, word_lens, batch_first=True, enforce_sorted=False
+        )
+        return {
+            "char_ids": char_ids,
+            "valid_token_mask": valid_token_mask,
+        }
diff --git a/parsing/src/benepar/decode_chart.py b/parsing/src/benepar/decode_chart.py
new file mode 100644
index 0000000000000000000000000000000000000000..8d32ed1bdbe3bef17f509ceffdd1138267a36b0e
--- /dev/null
+++ b/parsing/src/benepar/decode_chart.py
@@ -0,0 +1,291 @@
+"""
+Parsing formulated as span classification (https://arxiv.org/abs/1705.03919)
+"""
+
+import nltk
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch_struct
+
+from .parse_base import CompressedParserOutput
+
+
+def pad_charts(charts, padding_value=-100):
+    """Pad a list of variable-length charts with `padding_value`."""
+    batch_size = len(charts)
+    max_len = max(chart.shape[0] for chart in charts)
+    padded_charts = torch.full(
+        (batch_size, max_len, max_len),
+        padding_value,
+        dtype=charts[0].dtype,
+        device=charts[0].device,
+    )
+    for i, chart in enumerate(charts):
+        chart_size = chart.shape[0]
+        padded_charts[i, :chart_size, :chart_size] = chart
+    return padded_charts
+
+
+def collapse_unary_strip_pos(tree, strip_top=True):
+    """Collapse unary chains and strip part of speech tags."""
+
+    def strip_pos(tree):
+        if len(tree) == 1 and isinstance(tree[0], str):
+            return tree[0]
+        else:
+            return nltk.tree.Tree(tree.label(), [strip_pos(child) for child in tree])
+
+    collapsed_tree = strip_pos(tree)
+    collapsed_tree.collapse_unary(collapsePOS=True, joinChar="::")
+    if collapsed_tree.label() in ("TOP", "ROOT", "S1", "VROOT"):
+        if strip_top:
+            if len(collapsed_tree) == 1:
+                collapsed_tree = collapsed_tree[0]
+            else:
+                collapsed_tree.set_label("")
+        elif len(collapsed_tree) == 1:
+            collapsed_tree[0].set_label(
+                collapsed_tree.label() + "::" + collapsed_tree[0].label())
+            collapsed_tree = collapsed_tree[0]
+    return collapsed_tree
+
+
+def _get_labeled_spans(tree, spans_out, start):
+    if isinstance(tree, str):
+        return start + 1
+
+    assert len(tree) > 1 or isinstance(
+        tree[0], str
+    ), "Must call collapse_unary_strip_pos first"
+    end = start
+    for child in tree:
+        end = _get_labeled_spans(child, spans_out, end)
+    # Spans are returned as closed intervals on both ends
+    spans_out.append((start, end - 1, tree.label()))
+    return end
+
+
+def get_labeled_spans(tree):
+    """Converts a tree into a list of labeled spans.
+
+    Args:
+        tree: an nltk.tree.Tree object
+
+    Returns:
+        A list of (span_start, span_end, span_label) tuples. The start and end
+        indices indicate the first and last words of the span (a closed
+        interval). Unary chains are collapsed, so e.g. a (S (VP ...)) will
+        result in a single span labeled "S+VP".
+    """
+    tree = collapse_unary_strip_pos(tree)
+    spans_out = []
+    _get_labeled_spans(tree, spans_out, start=0)
+    return spans_out
+
+
+def uncollapse_unary(tree, ensure_top=False):
+    """Un-collapse unary chains."""
+    if isinstance(tree, str):
+        return tree
+    else:
+        labels = tree.label().split("::")
+        if ensure_top and labels[0] != "TOP":
+            labels = ["TOP"] + labels
+        children = []
+        for child in tree:
+            child = uncollapse_unary(child)
+            children.append(child)
+        for label in labels[::-1]:
+            children = [nltk.tree.Tree(label, children)]
+        return children[0]
+
+
+class ChartDecoder:
+    """A chart decoder for parsing formulated as span classification."""
+
+    def __init__(self, label_vocab, force_root_constituent=True):
+        """Constructs a new ChartDecoder object.
+        Args:
+            label_vocab: A mapping from span labels to integer indices.
+        """
+        self.label_vocab = label_vocab
+        self.label_from_index = {i: label for label, i in label_vocab.items()}
+        self.force_root_constituent = force_root_constituent
+
+    @staticmethod
+    def build_vocab(trees):
+        label_set = set()
+        for tree in trees:
+            for _, _, label in get_labeled_spans(tree):
+                if label:
+                    label_set.add(label)
+        label_set = [""] + sorted(label_set)
+        return {label: i for i, label in enumerate(label_set)}
+    
+    @staticmethod
+    def infer_force_root_constituent(trees):
+        for tree in trees:
+            for _, _, label in get_labeled_spans(tree):
+                if not label:
+                    return False
+        return True
+
+    def chart_from_tree(self, tree):
+        spans = get_labeled_spans(tree)
+        num_words = len(tree.leaves())
+        chart = np.full((num_words, num_words), -100, dtype=int)
+        chart = np.tril(chart, -1)
+        # Now all invalid entries are filled with -100, and valid entries with 0
+        for start, end, label in spans:
+            # Previously unseen unary chains can occur in the dev/test sets.
+            # For now, we ignore them and don't mark the corresponding chart
+            # entry as a constituent.
+            if label in self.label_vocab:
+                chart[start, end] = self.label_vocab[label]
+        return chart
+
+    def charts_from_pytorch_scores_batched(self, scores, lengths):
+        """Runs CKY to recover span labels from scores (e.g. logits).
+
+        This method uses pytorch-struct to speed up decoding compared to the
+        pure-Python implementation of CKY used by tree_from_scores().
+
+        Args:
+            scores: a pytorch tensor of shape (batch size, max length,
+                max length, label vocab size).
+            lengths: a pytorch tensor of shape (batch size,)
+
+        Returns:
+            A list of numpy arrays, each of shape (sentence length, sentence
+                length).
+        """
+        scores = scores.detach()
+        scores = scores - scores[..., :1]
+        if self.force_root_constituent:
+            scores[torch.arange(scores.shape[0]), 0, lengths - 1, 0] -= 1e9
+        dist = torch_struct.TreeCRF(scores, lengths=lengths)
+        amax = dist.argmax
+        amax[..., 0] += 1e-9
+        padded_charts = amax.argmax(-1)
+        padded_charts = padded_charts.detach().cpu().numpy()
+        return [
+            chart[:length, :length] for chart, length in zip(padded_charts, lengths)
+        ]
+
+    def compressed_output_from_chart(self, chart):
+        chart_with_filled_diagonal = chart.copy()
+        np.fill_diagonal(chart_with_filled_diagonal, 1)
+        chart_with_filled_diagonal[0, -1] = 1
+        starts, inclusive_ends = np.where(chart_with_filled_diagonal)
+        preorder_sort = np.lexsort((-inclusive_ends, starts))
+        starts = starts[preorder_sort]
+        inclusive_ends = inclusive_ends[preorder_sort]
+        labels = chart[starts, inclusive_ends]
+        ends = inclusive_ends + 1
+        return CompressedParserOutput(starts=starts, ends=ends, labels=labels)
+
+    def tree_from_chart(self, chart, leaves):
+        compressed_output = self.compressed_output_from_chart(chart)
+        return compressed_output.to_tree(leaves, self.label_from_index)
+
+    def tree_from_scores(self, scores, leaves):
+        """Runs CKY to decode a tree from scores (e.g. logits).
+
+        If speed is important, consider using charts_from_pytorch_scores_batched
+        followed by compressed_output_from_chart or tree_from_chart instead.
+
+        Args:
+            scores: a chart of scores (or logits) of shape
+                (sentence length, sentence length, label vocab size). The first
+                two dimensions may be padded to a longer length, but all padded
+                values will be ignored.
+            leaves: the leaf nodes to use in the constructed tree. These
+                may be of type str or nltk.Tree, or (word, tag) tuples that
+                will be used to construct the leaf node objects.
+
+        Returns:
+            An nltk.Tree object.
+        """
+        leaves = [
+            nltk.Tree(node[1], [node[0]]) if isinstance(node, tuple) else node
+            for node in leaves
+        ]
+
+        chart = {}
+        scores = scores - scores[:, :, 0, None]
+        for length in range(1, len(leaves) + 1):
+            for left in range(0, len(leaves) + 1 - length):
+                right = left + length
+
+                label_scores = scores[left, right - 1]
+                label_scores = label_scores - label_scores[0]
+
+                argmax_label_index = int(
+                    label_scores.argmax()
+                    if length < len(leaves) or not self.force_root_constituent
+                    else label_scores[1:].argmax() + 1
+                )
+                argmax_label = self.label_from_index[argmax_label_index]
+                label = argmax_label
+                label_score = label_scores[argmax_label_index]
+
+                if length == 1:
+                    tree = leaves[left]
+                    if label:
+                        tree = nltk.tree.Tree(label, [tree])
+                    chart[left, right] = [tree], label_score
+                    continue
+
+                best_split = max(
+                    range(left + 1, right),
+                    key=lambda split: (chart[left, split][1] + chart[split, right][1]),
+                )
+
+                left_trees, left_score = chart[left, best_split]
+                right_trees, right_score = chart[best_split, right]
+
+                children = left_trees + right_trees
+                if label:
+                    children = [nltk.tree.Tree(label, children)]
+
+                chart[left, right] = (children, label_score + left_score + right_score)
+
+        children, score = chart[0, len(leaves)]
+        tree = nltk.tree.Tree("TOP", children)
+        tree = uncollapse_unary(tree)
+        return tree
+
+
+class SpanClassificationMarginLoss(nn.Module):
+    def __init__(self, force_root_constituent=True, reduction="mean"):
+        super().__init__()
+        self.force_root_constituent = force_root_constituent
+        if reduction not in ("none", "mean", "sum"):
+            raise ValueError(f"Invalid value for reduction: {reduction}")
+        self.reduction = reduction
+
+    def forward(self, logits, labels):
+        gold_event = F.one_hot(F.relu(labels), num_classes=logits.shape[-1])
+
+        logits = logits - logits[..., :1]
+        lengths = (labels[:, 0, :] != -100).sum(-1)
+        augment = (1 - gold_event).to(torch.float)
+        if self.force_root_constituent:
+            augment[torch.arange(augment.shape[0]), 0, lengths - 1, 0] -= 1e9
+        dist = torch_struct.TreeCRF(logits + augment, lengths=lengths)
+
+        pred_score = dist.max
+        gold_score = (logits * gold_event).sum((1, 2, 3))
+
+        margin_losses = F.relu(pred_score - gold_score)
+
+        if self.reduction == "none":
+            return margin_losses
+        elif self.reduction == "mean":
+            return margin_losses.mean()
+        elif self.reduction == "sum":
+            return margin_losses.sum()
+        else:
+            assert False, f"Unexpected reduction: {self.reduction}"
diff --git a/parsing/src/benepar/decode_chart.py~ b/parsing/src/benepar/decode_chart.py~
new file mode 100644
index 0000000000000000000000000000000000000000..8d32ed1bdbe3bef17f509ceffdd1138267a36b0e
--- /dev/null
+++ b/parsing/src/benepar/decode_chart.py~
@@ -0,0 +1,291 @@
+"""
+Parsing formulated as span classification (https://arxiv.org/abs/1705.03919)
+"""
+
+import nltk
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch_struct
+
+from .parse_base import CompressedParserOutput
+
+
+def pad_charts(charts, padding_value=-100):
+    """Pad a list of variable-length charts with `padding_value`."""
+    batch_size = len(charts)
+    max_len = max(chart.shape[0] for chart in charts)
+    padded_charts = torch.full(
+        (batch_size, max_len, max_len),
+        padding_value,
+        dtype=charts[0].dtype,
+        device=charts[0].device,
+    )
+    for i, chart in enumerate(charts):
+        chart_size = chart.shape[0]
+        padded_charts[i, :chart_size, :chart_size] = chart
+    return padded_charts
+
+
+def collapse_unary_strip_pos(tree, strip_top=True):
+    """Collapse unary chains and strip part of speech tags."""
+
+    def strip_pos(tree):
+        if len(tree) == 1 and isinstance(tree[0], str):
+            return tree[0]
+        else:
+            return nltk.tree.Tree(tree.label(), [strip_pos(child) for child in tree])
+
+    collapsed_tree = strip_pos(tree)
+    collapsed_tree.collapse_unary(collapsePOS=True, joinChar="::")
+    if collapsed_tree.label() in ("TOP", "ROOT", "S1", "VROOT"):
+        if strip_top:
+            if len(collapsed_tree) == 1:
+                collapsed_tree = collapsed_tree[0]
+            else:
+                collapsed_tree.set_label("")
+        elif len(collapsed_tree) == 1:
+            collapsed_tree[0].set_label(
+                collapsed_tree.label() + "::" + collapsed_tree[0].label())
+            collapsed_tree = collapsed_tree[0]
+    return collapsed_tree
+
+
+def _get_labeled_spans(tree, spans_out, start):
+    if isinstance(tree, str):
+        return start + 1
+
+    assert len(tree) > 1 or isinstance(
+        tree[0], str
+    ), "Must call collapse_unary_strip_pos first"
+    end = start
+    for child in tree:
+        end = _get_labeled_spans(child, spans_out, end)
+    # Spans are returned as closed intervals on both ends
+    spans_out.append((start, end - 1, tree.label()))
+    return end
+
+
+def get_labeled_spans(tree):
+    """Converts a tree into a list of labeled spans.
+
+    Args:
+        tree: an nltk.tree.Tree object
+
+    Returns:
+        A list of (span_start, span_end, span_label) tuples. The start and end
+        indices indicate the first and last words of the span (a closed
+        interval). Unary chains are collapsed, so e.g. a (S (VP ...)) will
+        result in a single span labeled "S+VP".
+    """
+    tree = collapse_unary_strip_pos(tree)
+    spans_out = []
+    _get_labeled_spans(tree, spans_out, start=0)
+    return spans_out
+
+
+def uncollapse_unary(tree, ensure_top=False):
+    """Un-collapse unary chains."""
+    if isinstance(tree, str):
+        return tree
+    else:
+        labels = tree.label().split("::")
+        if ensure_top and labels[0] != "TOP":
+            labels = ["TOP"] + labels
+        children = []
+        for child in tree:
+            child = uncollapse_unary(child)
+            children.append(child)
+        for label in labels[::-1]:
+            children = [nltk.tree.Tree(label, children)]
+        return children[0]
+
+
+class ChartDecoder:
+    """A chart decoder for parsing formulated as span classification."""
+
+    def __init__(self, label_vocab, force_root_constituent=True):
+        """Constructs a new ChartDecoder object.
+        Args:
+            label_vocab: A mapping from span labels to integer indices.
+        """
+        self.label_vocab = label_vocab
+        self.label_from_index = {i: label for label, i in label_vocab.items()}
+        self.force_root_constituent = force_root_constituent
+
+    @staticmethod
+    def build_vocab(trees):
+        label_set = set()
+        for tree in trees:
+            for _, _, label in get_labeled_spans(tree):
+                if label:
+                    label_set.add(label)
+        label_set = [""] + sorted(label_set)
+        return {label: i for i, label in enumerate(label_set)}
+    
+    @staticmethod
+    def infer_force_root_constituent(trees):
+        for tree in trees:
+            for _, _, label in get_labeled_spans(tree):
+                if not label:
+                    return False
+        return True
+
+    def chart_from_tree(self, tree):
+        spans = get_labeled_spans(tree)
+        num_words = len(tree.leaves())
+        chart = np.full((num_words, num_words), -100, dtype=int)
+        chart = np.tril(chart, -1)
+        # Now all invalid entries are filled with -100, and valid entries with 0
+        for start, end, label in spans:
+            # Previously unseen unary chains can occur in the dev/test sets.
+            # For now, we ignore them and don't mark the corresponding chart
+            # entry as a constituent.
+            if label in self.label_vocab:
+                chart[start, end] = self.label_vocab[label]
+        return chart
+
+    def charts_from_pytorch_scores_batched(self, scores, lengths):
+        """Runs CKY to recover span labels from scores (e.g. logits).
+
+        This method uses pytorch-struct to speed up decoding compared to the
+        pure-Python implementation of CKY used by tree_from_scores().
+
+        Args:
+            scores: a pytorch tensor of shape (batch size, max length,
+                max length, label vocab size).
+            lengths: a pytorch tensor of shape (batch size,)
+
+        Returns:
+            A list of numpy arrays, each of shape (sentence length, sentence
+                length).
+        """
+        scores = scores.detach()
+        scores = scores - scores[..., :1]
+        if self.force_root_constituent:
+            scores[torch.arange(scores.shape[0]), 0, lengths - 1, 0] -= 1e9
+        dist = torch_struct.TreeCRF(scores, lengths=lengths)
+        amax = dist.argmax
+        amax[..., 0] += 1e-9
+        padded_charts = amax.argmax(-1)
+        padded_charts = padded_charts.detach().cpu().numpy()
+        return [
+            chart[:length, :length] for chart, length in zip(padded_charts, lengths)
+        ]
+
+    def compressed_output_from_chart(self, chart):
+        chart_with_filled_diagonal = chart.copy()
+        np.fill_diagonal(chart_with_filled_diagonal, 1)
+        chart_with_filled_diagonal[0, -1] = 1
+        starts, inclusive_ends = np.where(chart_with_filled_diagonal)
+        preorder_sort = np.lexsort((-inclusive_ends, starts))
+        starts = starts[preorder_sort]
+        inclusive_ends = inclusive_ends[preorder_sort]
+        labels = chart[starts, inclusive_ends]
+        ends = inclusive_ends + 1
+        return CompressedParserOutput(starts=starts, ends=ends, labels=labels)
+
+    def tree_from_chart(self, chart, leaves):
+        compressed_output = self.compressed_output_from_chart(chart)
+        return compressed_output.to_tree(leaves, self.label_from_index)
+
+    def tree_from_scores(self, scores, leaves):
+        """Runs CKY to decode a tree from scores (e.g. logits).
+
+        If speed is important, consider using charts_from_pytorch_scores_batched
+        followed by compressed_output_from_chart or tree_from_chart instead.
+
+        Args:
+            scores: a chart of scores (or logits) of shape
+                (sentence length, sentence length, label vocab size). The first
+                two dimensions may be padded to a longer length, but all padded
+                values will be ignored.
+            leaves: the leaf nodes to use in the constructed tree. These
+                may be of type str or nltk.Tree, or (word, tag) tuples that
+                will be used to construct the leaf node objects.
+
+        Returns:
+            An nltk.Tree object.
+        """
+        leaves = [
+            nltk.Tree(node[1], [node[0]]) if isinstance(node, tuple) else node
+            for node in leaves
+        ]
+
+        chart = {}
+        scores = scores - scores[:, :, 0, None]
+        for length in range(1, len(leaves) + 1):
+            for left in range(0, len(leaves) + 1 - length):
+                right = left + length
+
+                label_scores = scores[left, right - 1]
+                label_scores = label_scores - label_scores[0]
+
+                argmax_label_index = int(
+                    label_scores.argmax()
+                    if length < len(leaves) or not self.force_root_constituent
+                    else label_scores[1:].argmax() + 1
+                )
+                argmax_label = self.label_from_index[argmax_label_index]
+                label = argmax_label
+                label_score = label_scores[argmax_label_index]
+
+                if length == 1:
+                    tree = leaves[left]
+                    if label:
+                        tree = nltk.tree.Tree(label, [tree])
+                    chart[left, right] = [tree], label_score
+                    continue
+
+                best_split = max(
+                    range(left + 1, right),
+                    key=lambda split: (chart[left, split][1] + chart[split, right][1]),
+                )
+
+                left_trees, left_score = chart[left, best_split]
+                right_trees, right_score = chart[best_split, right]
+
+                children = left_trees + right_trees
+                if label:
+                    children = [nltk.tree.Tree(label, children)]
+
+                chart[left, right] = (children, label_score + left_score + right_score)
+
+        children, score = chart[0, len(leaves)]
+        tree = nltk.tree.Tree("TOP", children)
+        tree = uncollapse_unary(tree)
+        return tree
+
+
+class SpanClassificationMarginLoss(nn.Module):
+    def __init__(self, force_root_constituent=True, reduction="mean"):
+        super().__init__()
+        self.force_root_constituent = force_root_constituent
+        if reduction not in ("none", "mean", "sum"):
+            raise ValueError(f"Invalid value for reduction: {reduction}")
+        self.reduction = reduction
+
+    def forward(self, logits, labels):
+        gold_event = F.one_hot(F.relu(labels), num_classes=logits.shape[-1])
+
+        logits = logits - logits[..., :1]
+        lengths = (labels[:, 0, :] != -100).sum(-1)
+        augment = (1 - gold_event).to(torch.float)
+        if self.force_root_constituent:
+            augment[torch.arange(augment.shape[0]), 0, lengths - 1, 0] -= 1e9
+        dist = torch_struct.TreeCRF(logits + augment, lengths=lengths)
+
+        pred_score = dist.max
+        gold_score = (logits * gold_event).sum((1, 2, 3))
+
+        margin_losses = F.relu(pred_score - gold_score)
+
+        if self.reduction == "none":
+            return margin_losses
+        elif self.reduction == "mean":
+            return margin_losses.mean()
+        elif self.reduction == "sum":
+            return margin_losses.sum()
+        else:
+            assert False, f"Unexpected reduction: {self.reduction}"
diff --git a/parsing/src/benepar/integrations/__init__.py b/parsing/src/benepar/integrations/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/parsing/src/benepar/integrations/__pycache__/__init__.cpython-310.pyc b/parsing/src/benepar/integrations/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..472b8f490320e3043a0516e7b31f60831e7d940d
Binary files /dev/null and b/parsing/src/benepar/integrations/__pycache__/__init__.cpython-310.pyc differ
diff --git a/parsing/src/benepar/integrations/__pycache__/__init__.cpython-37.pyc b/parsing/src/benepar/integrations/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3982b477e04fd204a3b37ed2cfc84c310c1e48d2
Binary files /dev/null and b/parsing/src/benepar/integrations/__pycache__/__init__.cpython-37.pyc differ
diff --git a/parsing/src/benepar/integrations/__pycache__/__init__.cpython-38.pyc b/parsing/src/benepar/integrations/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a7a12813040a5c58aaefae1698f6f3ab25d4fa5e
Binary files /dev/null and b/parsing/src/benepar/integrations/__pycache__/__init__.cpython-38.pyc differ
diff --git a/parsing/src/benepar/integrations/__pycache__/downloader.cpython-310.pyc b/parsing/src/benepar/integrations/__pycache__/downloader.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9b8c58a99ef65a47d79a1699bb00b08c3f4dce05
Binary files /dev/null and b/parsing/src/benepar/integrations/__pycache__/downloader.cpython-310.pyc differ
diff --git a/parsing/src/benepar/integrations/__pycache__/downloader.cpython-37.pyc b/parsing/src/benepar/integrations/__pycache__/downloader.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..31cf8a2b28a8eb77f6b70f2d65f75c9f542cf668
Binary files /dev/null and b/parsing/src/benepar/integrations/__pycache__/downloader.cpython-37.pyc differ
diff --git a/parsing/src/benepar/integrations/__pycache__/downloader.cpython-38.pyc b/parsing/src/benepar/integrations/__pycache__/downloader.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..07367b4c39ea7ac42d907255ea32487c7cadceff
Binary files /dev/null and b/parsing/src/benepar/integrations/__pycache__/downloader.cpython-38.pyc differ
diff --git a/parsing/src/benepar/integrations/__pycache__/nltk_plugin.cpython-310.pyc b/parsing/src/benepar/integrations/__pycache__/nltk_plugin.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a1ce1b4bfa1f0673955e35059a6c9fe9ae0213c6
Binary files /dev/null and b/parsing/src/benepar/integrations/__pycache__/nltk_plugin.cpython-310.pyc differ
diff --git a/parsing/src/benepar/integrations/__pycache__/nltk_plugin.cpython-37.pyc b/parsing/src/benepar/integrations/__pycache__/nltk_plugin.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..43ef2f670994fc2e3218226531e128894aefc5ec
Binary files /dev/null and b/parsing/src/benepar/integrations/__pycache__/nltk_plugin.cpython-37.pyc differ
diff --git a/parsing/src/benepar/integrations/__pycache__/nltk_plugin.cpython-38.pyc b/parsing/src/benepar/integrations/__pycache__/nltk_plugin.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1dfb61420e1dc1c6e664e6e8607c1d8a6793df0f
Binary files /dev/null and b/parsing/src/benepar/integrations/__pycache__/nltk_plugin.cpython-38.pyc differ
diff --git a/parsing/src/benepar/integrations/__pycache__/spacy_extensions.cpython-310.pyc b/parsing/src/benepar/integrations/__pycache__/spacy_extensions.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e11db5a43e08b4f6c74587b0e6a10f3da3d7c44b
Binary files /dev/null and b/parsing/src/benepar/integrations/__pycache__/spacy_extensions.cpython-310.pyc differ
diff --git a/parsing/src/benepar/integrations/__pycache__/spacy_extensions.cpython-37.pyc b/parsing/src/benepar/integrations/__pycache__/spacy_extensions.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9b1bcb9c6f7746983ae772c32e7d04f794a69faf
Binary files /dev/null and b/parsing/src/benepar/integrations/__pycache__/spacy_extensions.cpython-37.pyc differ
diff --git a/parsing/src/benepar/integrations/__pycache__/spacy_extensions.cpython-38.pyc b/parsing/src/benepar/integrations/__pycache__/spacy_extensions.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2e56376b5428a0226c1ba5f35172c9850c2bd2ae
Binary files /dev/null and b/parsing/src/benepar/integrations/__pycache__/spacy_extensions.cpython-38.pyc differ
diff --git a/parsing/src/benepar/integrations/__pycache__/spacy_plugin.cpython-310.pyc b/parsing/src/benepar/integrations/__pycache__/spacy_plugin.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7eccc195c49c47f56f863d6baa52b1763a87f935
Binary files /dev/null and b/parsing/src/benepar/integrations/__pycache__/spacy_plugin.cpython-310.pyc differ
diff --git a/parsing/src/benepar/integrations/__pycache__/spacy_plugin.cpython-37.pyc b/parsing/src/benepar/integrations/__pycache__/spacy_plugin.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2152c134b41ead0535fcbeb883261b6fc7fc8d5b
Binary files /dev/null and b/parsing/src/benepar/integrations/__pycache__/spacy_plugin.cpython-37.pyc differ
diff --git a/parsing/src/benepar/integrations/__pycache__/spacy_plugin.cpython-38.pyc b/parsing/src/benepar/integrations/__pycache__/spacy_plugin.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e87bddf3b2b5a0fd7e8ecc3d3bd4d78c4ef63f5a
Binary files /dev/null and b/parsing/src/benepar/integrations/__pycache__/spacy_plugin.cpython-38.pyc differ
diff --git a/parsing/src/benepar/integrations/downloader.py b/parsing/src/benepar/integrations/downloader.py
new file mode 100644
index 0000000000000000000000000000000000000000..019aa4e286fcce338659bab0894068512d5f71ce
--- /dev/null
+++ b/parsing/src/benepar/integrations/downloader.py
@@ -0,0 +1,35 @@
+import os
+
+BENEPAR_SERVER_INDEX = "https://kitaev.com/benepar/index.xml"
+
+_downloader = None
+def get_downloader():
+    global _downloader
+    if _downloader is None:
+        import nltk.downloader
+        _downloader = nltk.downloader.Downloader(server_index_url=BENEPAR_SERVER_INDEX)
+    return _downloader
+
+def download(*args, **kwargs):
+    return get_downloader().download(*args, **kwargs)
+
+def locate_model(name):
+    if os.path.exists(name):
+        return name
+    elif "/" not in name and "." not in name:
+        import nltk.data
+        try:
+            nltk_loc = nltk.data.find(f"models/{name}")
+            return nltk_loc.path
+        except LookupError as e:
+            arg = e.args[0].replace("nltk.download", "benepar.download")
+        
+        raise LookupError(arg)
+    
+    raise LookupError("Can't find {}".format(name))
+
+def load_trained_model(model_name_or_path):
+    model_path = locate_model(model_name_or_path)
+    from ..parse_chart import ChartParser
+    parser = ChartParser.from_trained(model_path)
+    return parser
diff --git a/parsing/src/benepar/integrations/nltk_plugin.py b/parsing/src/benepar/integrations/nltk_plugin.py
new file mode 100644
index 0000000000000000000000000000000000000000..f7a454d79eed45700c0882779cc62d5a0190c5b6
--- /dev/null
+++ b/parsing/src/benepar/integrations/nltk_plugin.py
@@ -0,0 +1,279 @@
+import dataclasses
+import itertools
+from typing import List, Optional, Tuple
+
+import nltk
+import torch
+
+from .downloader import load_trained_model
+from ..parse_base import BaseParser, BaseInputExample
+from ..ptb_unescape import ptb_unescape, guess_space_after
+
+
+TOKENIZER_LOOKUP = {
+    "en": "english",
+    "de": "german",
+    "fr": "french",
+    "pl": "polish",
+    "sv": "swedish",
+}
+
+LANGUAGE_GUESS = {
+    "ar": ("X", "XP", "WHADVP", "WHNP", "WHPP"),
+    "zh": ("VSB", "VRD", "VPT", "VNV"),
+    "en": ("WHNP", "WHADJP", "SINV", "SQ"),
+    "de": ("AA", "AP", "CCP", "CH", "CNP", "VZ"),
+    "fr": ("P+", "P+D+", "PRO+", "PROREL+"),
+    "he": ("PREDP", "SYN_REL", "SYN_yyDOT"),
+    "pl": ("formaczas", "znakkonca"),
+    "sv": ("PSEUDO", "AVP", "XP"),
+}
+
+
+def guess_language(label_vocab):
+    """Guess parser language based on its syntactic label inventory.
+
+    The parser training scripts are designed to accept arbitrary input tree
+    files with minimal language-specific behavior, but at inference time we may
+    need to know the language identity in order to invoke other pipeline
+    elements, such as tokenizers.
+    """
+    for language, required_labels in LANGUAGE_GUESS.items():
+        if all(label in label_vocab for label in required_labels):
+            return language
+    return None
+
+
+@dataclasses.dataclass
+class InputSentence(BaseInputExample):
+    """Parser input for a single sentence.
+
+    At least one of `words` and `escaped_words` is required for each input
+    sentence. The remaining fields are optional: the parser will attempt to
+    derive the value for any missing fields using the fields that are provided.
+
+    `words` and `space_after` together form a reversible tokenization of the
+    input text: they represent, respectively, the Unicode text for each word and
+    an indicator for whether the word is followed by whitespace. These are used
+    as inputs by the parser.
+
+    `tags` is a list of part-of-speech tags, if available prior to running the
+    parser. The parser does not actually use these tags as input, but it will
+    pass them through to its output. If `tags` is None, the parser will perform
+    its own part of speech tagging (if the parser was not trained to also do
+    tagging, "UNK" part-of-speech tags will be used in the output instead).
+
+    `escaped_words` are the representations of each leaf to use in the output
+    tree. If `words` is provided, `escaped_words` will not be used by the neural
+    network portion of the parser, and will only be incorporated when
+    constructing the output tree. Therefore, `escaped_words` may be used to
+    accommodate any dataset-specific text encoding, such as transliteration.
+
+    Here is an example of the differences between these fields for English PTB:
+        (raw text):     "Fly safely."
+        words:          "       Fly     safely  .       "
+        space_after:    False   True    False   False   False
+        tags:           ``      VB      RB      .       ''
+        escaped_words:  ``      Fly     safely  .       ''
+    """
+
+    words: Optional[List[str]] = None
+    space_after: Optional[List[bool]] = None
+    tags: Optional[List[str]] = None
+    escaped_words: Optional[List[str]] = None
+
+    @property
+    def tree(self):
+        return None
+
+    def leaves(self):
+        return self.escaped_words
+
+    def pos(self):
+        if self.tags is not None:
+            return list(zip(self.escaped_words, self.tags))
+        else:
+            return [(word, "UNK") for word in self.escaped_words]
+
+
+class Parser:
+    """Berkeley Neural Parser (benepar), integrated with NLTK.
+
+    Use this class to apply the Berkeley Neural Parser to pre-tokenized datasets
+    and treebanks, or when integrating the parser into an NLP pipeline that
+    already performs tokenization, sentence splitting, and (optionally)
+    part-of-speech tagging. For parsing starting with raw text, it is strongly
+    encouraged that you use spaCy and benepar.BeneparComponent instead.
+
+    Sample usage:
+    >>> parser = benepar.Parser("benepar_en3")
+    >>> input_sentence = benepar.InputSentence(
+        words=['"', 'Fly', 'safely', '.', '"'],
+        space_after=[False, True, False, False, False],
+        tags=['``', 'VB', 'RB', '.', "''"],
+        escaped_words=['``', 'Fly', 'safely', '.', "''"],
+    )
+    >>> parser.parse(input_sentence)
+
+    Not all fields of benepar.InputSentence are required, but at least one of
+    `words` and `escaped_words` must not be None. The parser will attempt to
+    guess the value for missing fields. For example,
+    >>> input_sentence = benepar.InputSentence(
+        words=['"', 'Fly', 'safely', '.', '"'],
+    )
+    >>> parser.parse(input_sentence)
+
+    Although this class is primarily designed for use with data that has already
+    been tokenized, to help with interactive use and debugging it also accepts
+    simple text string inputs. However, using this class to parse from raw text
+    is STRONGLY DISCOURAGED for any application where parsing accuracy matters.
+    When parsing from raw text, use spaCy and benepar.BeneparComponent instead.
+    The reason is that parser models do not ship with a tokenizer or sentence
+    splitter, and some models may not include a part-of-speech tagger either. A
+    toolkit must be used to fill in these pipeline components, and spaCy
+    outperforms NLTK in all of these areas (sometimes by a large margin).
+    >>> parser.parse('"Fly safely."')  # For debugging/interactive use only.
+    """
+
+    def __init__(self, name, batch_size=64, language_code=None):
+        """Load a trained parser model.
+
+        Args:
+            name (str): Model name, or path to pytorch saved model
+            batch_size (int): Maximum number of sentences to process per batch
+            language_code (str, optional): language code for the parser (e.g.
+                'en', 'he', 'zh', etc). Our official trained models will set
+                this automatically, so this argument is only needed if training
+                on new languages or treebanks.
+        """
+        self._parser = load_trained_model(name)
+        if torch.cuda.is_available():
+            self._parser.cuda()
+        if language_code is not None:
+            self._language_code = language_code
+        else:
+            self._language_code = guess_language(self._parser.config["label_vocab"])
+        self._tokenizer_lang = TOKENIZER_LOOKUP.get(self._language_code, None)
+
+        self.batch_size = batch_size
+
+    def parse(self, sentence):
+        """Parse a single sentence
+
+        Args:
+            sentence (InputSentence or List[str] or str): Sentence to parse.
+                If the input is of List[str], it is assumed to be a sequence of
+                words and will behave the same as only setting the `words` field
+                of InputSentence. If the input is of type str, the sentence will
+                be tokenized using the default NLTK tokenizer (not recommended:
+                if parsing from raw text, use spaCy and benepar.BeneparComponent
+                instead).
+
+        Returns:
+            nltk.Tree
+        """
+        return list(self.parse_sents([sentence]))[0]
+
+    def parse_sents(self, sents):
+        """Parse multiple sentences in batches.
+
+        Args:
+            sents (Iterable[InputSentence]): An iterable of sentences to be
+                parsed. `sents` may also be a string, in which case it will be
+                segmented into sentences using the default NLTK sentence
+                splitter (not recommended: if parsing from raw text, use spaCy
+                and benepar.BeneparComponent instead). Otherwise, each element
+                of `sents` will be treated as a sentence. The elements of
+                `sents` may also be List[str] or str: see Parser.parse() for
+                documentation regarding these cases.
+
+        Yields:
+            nltk.Tree objects, one per input sentence.
+        """
+        if isinstance(sents, str):
+            if self._tokenizer_lang is None:
+                raise ValueError(
+                    "No tokenizer available for this language. "
+                    "Please split into individual sentences and tokens "
+                    "before calling the parser."
+                )
+            sents = nltk.sent_tokenize(sents, self._tokenizer_lang)
+
+        end_sentinel = object()
+        for batch_sents in itertools.zip_longest(
+            *([iter(sents)] * self.batch_size), fillvalue=end_sentinel
+        ):
+            batch_inputs = []
+            for sent in batch_sents:
+                if sent is end_sentinel:
+                    break
+                elif isinstance(sent, str):
+                    if self._tokenizer_lang is None:
+                        raise ValueError(
+                            "No word tokenizer available for this language. "
+                            "Please tokenize before calling the parser."
+                        )
+                    escaped_words = nltk.word_tokenize(sent, self._tokenizer_lang)
+                    sent = InputSentence(escaped_words=escaped_words)
+                elif isinstance(sent, (list, tuple)):
+                    sent = InputSentence(words=sent)
+                elif not isinstance(sent, InputSentence):
+                    raise ValueError(
+                        "Sentences must be one of: InputSentence, list, tuple, or str"
+                    )
+                batch_inputs.append(self._with_missing_fields_filled(sent))
+
+            for inp, output in zip(
+                batch_inputs, self._parser.parse(batch_inputs, return_compressed=True)
+            ):
+                # If pos tags are provided as input, ignore any tags predicted
+                # by the parser.
+                if inp.tags is not None:
+                    output = output.without_predicted_tags()
+                yield output.to_tree(
+                    inp.pos(),
+                    self._parser.decoder.label_from_index,
+                    self._parser.tag_from_index,
+                )
+
+    def _with_missing_fields_filled(self, sent):
+        if not isinstance(sent, InputSentence):
+            raise ValueError("Input is not an instance of InputSentence")
+        if sent.words is None and sent.escaped_words is None:
+            raise ValueError("At least one of words or escaped_words is required")
+        elif sent.words is None:
+            sent = dataclasses.replace(sent, words=ptb_unescape(sent.escaped_words))
+        elif sent.escaped_words is None:
+            escaped_words = [
+                word.replace("(", "-LRB-")
+                .replace(")", "-RRB-")
+                .replace("{", "-LCB-")
+                .replace("}", "-RCB-")
+                .replace("[", "-LSB-")
+                .replace("]", "-RSB-")
+                for word in sent.words
+            ]
+            sent = dataclasses.replace(sent, escaped_words=escaped_words)
+        else:
+            if len(sent.words) != len(sent.escaped_words):
+                raise ValueError(
+                    f"Length of words ({len(sent.words)}) does not match "
+                    f"escaped_words ({len(sent.escaped_words)})"
+                )
+
+        if sent.space_after is None:
+            if self._language_code == "zh":
+                space_after = [False for _ in sent.words]
+            elif self._language_code in ("ar", "he"):
+                space_after = [True for _ in sent.words]
+            else:
+                space_after = guess_space_after(sent.words)
+            sent = dataclasses.replace(sent, space_after=space_after)
+        elif len(sent.words) != len(sent.space_after):
+            raise ValueError(
+                f"Length of words ({len(sent.words)}) does not match "
+                f"space_after ({len(sent.space_after)})"
+            )
+
+        assert len(sent.words) == len(sent.escaped_words) == len(sent.space_after)
+        return sent
diff --git a/parsing/src/benepar/integrations/spacy_extensions.py b/parsing/src/benepar/integrations/spacy_extensions.py
new file mode 100644
index 0000000000000000000000000000000000000000..572dc45fa8371d97f758a39d213834ce33bed998
--- /dev/null
+++ b/parsing/src/benepar/integrations/spacy_extensions.py
@@ -0,0 +1,179 @@
+NOT_PARSED_SENTINEL = object()
+
+
+class NonConstituentException(Exception):
+    pass
+
+
+class ConstituentData:
+    def __init__(self, starts, ends, labels, loc_to_constituent, label_vocab):
+        self.starts = starts
+        self.ends = ends
+        self.labels = labels
+        self.loc_to_constituent = loc_to_constituent
+        self.label_vocab = label_vocab
+
+
+def get_constituent(span):
+    constituent_data = span.doc._._constituent_data
+    if constituent_data is NOT_PARSED_SENTINEL:
+        raise Exception(
+            "No constituency parse is available for this document."
+            " Consider adding a BeneparComponent to the pipeline."
+        )
+
+    search_start = constituent_data.loc_to_constituent[span.start]
+    if span.start + 1 < len(constituent_data.loc_to_constituent):
+        search_end = constituent_data.loc_to_constituent[span.start + 1]
+    else:
+        search_end = len(constituent_data.ends)
+    found_position = None
+    for position in range(search_start, search_end):
+        if constituent_data.ends[position] <= span.end:
+            if constituent_data.ends[position] == span.end:
+                found_position = position
+            break
+
+    if found_position is None:
+        raise NonConstituentException("Span is not a constituent: {}".format(span))
+    return constituent_data, found_position
+
+
+def get_labels(span):
+    constituent_data, position = get_constituent(span)
+    label_num = constituent_data.labels[position]
+    return constituent_data.label_vocab[label_num]
+
+
+def parse_string(span):
+    constituent_data, position = get_constituent(span)
+    label_vocab = constituent_data.label_vocab
+    doc = span.doc
+
+    idx = position - 1
+
+    def make_str():
+        nonlocal idx
+        idx += 1
+        i, j, label_idx = (
+            constituent_data.starts[idx],
+            constituent_data.ends[idx],
+            constituent_data.labels[idx],
+        )
+        label = label_vocab[label_idx]
+        if (i + 1) >= j:
+            token = doc[i]
+            s = (
+                "("
+                + u"{} {}".format(token.tag_, token.text)
+                .replace("(", "-LRB-")
+                .replace(")", "-RRB-")
+                .replace("{", "-LCB-")
+                .replace("}", "-RCB-")
+                .replace("[", "-LSB-")
+                .replace("]", "-RSB-")
+                + ")"
+            )
+        else:
+            children = []
+            while (
+                (idx + 1) < len(constituent_data.starts)
+                and i <= constituent_data.starts[idx + 1]
+                and constituent_data.ends[idx + 1] <= j
+            ):
+                children.append(make_str())
+
+            s = u" ".join(children)
+
+        for sublabel in reversed(label):
+            s = u"({} {})".format(sublabel, s)
+        return s
+
+    return make_str()
+
+
+def get_subconstituents(span):
+    constituent_data, position = get_constituent(span)
+    label_vocab = constituent_data.label_vocab
+    doc = span.doc
+
+    while position < len(constituent_data.starts):
+        start = constituent_data.starts[position]
+        end = constituent_data.ends[position]
+
+        if span.end <= start or span.end < end:
+            break
+
+        yield doc[start:end]
+        position += 1
+
+
+def get_child_spans(span):
+    constituent_data, position = get_constituent(span)
+    label_vocab = constituent_data.label_vocab
+    doc = span.doc
+
+    child_start_expected = span.start
+    position += 1
+    while position < len(constituent_data.starts):
+        start = constituent_data.starts[position]
+        end = constituent_data.ends[position]
+
+        if span.end <= start or span.end < end:
+            break
+
+        if start == child_start_expected:
+            yield doc[start:end]
+            child_start_expected = end
+
+        position += 1
+
+
+def get_parent_span(span):
+    constituent_data, position = get_constituent(span)
+    label_vocab = constituent_data.label_vocab
+    doc = span.doc
+    sent = span.sent
+
+    position -= 1
+    while position >= 0:
+        start = constituent_data.starts[position]
+        end = constituent_data.ends[position]
+
+        if start <= span.start and span.end <= end:
+            return doc[start:end]
+        if end < span.sent.start:
+            break
+        position -= 1
+
+    return None
+
+
+def install_spacy_extensions():
+    from spacy.tokens import Doc, Span, Token
+
+    # None is not allowed as a default extension value!
+    Doc.set_extension("_constituent_data", default=NOT_PARSED_SENTINEL)
+
+    Span.set_extension("labels", getter=get_labels)
+    Span.set_extension("parse_string", getter=parse_string)
+    Span.set_extension("constituents", getter=get_subconstituents)
+    Span.set_extension("parent", getter=get_parent_span)
+    Span.set_extension("children", getter=get_child_spans)
+
+    Token.set_extension(
+        "labels", getter=lambda token: get_labels(token.doc[token.i : token.i + 1])
+    )
+    Token.set_extension(
+        "parse_string",
+        getter=lambda token: parse_string(token.doc[token.i : token.i + 1]),
+    )
+    Token.set_extension(
+        "parent", getter=lambda token: get_parent_span(token.doc[token.i : token.i + 1])
+    )
+
+
+try:
+    install_spacy_extensions()
+except ImportError:
+    pass
diff --git a/parsing/src/benepar/integrations/spacy_plugin.py b/parsing/src/benepar/integrations/spacy_plugin.py
new file mode 100644
index 0000000000000000000000000000000000000000..41ca8b6e41a6a3368a7c1d207a99704b68a82491
--- /dev/null
+++ b/parsing/src/benepar/integrations/spacy_plugin.py
@@ -0,0 +1,206 @@
+import numpy as np
+
+from .downloader import load_trained_model
+from ..parse_base import BaseParser, BaseInputExample
+from .spacy_extensions import ConstituentData, NonConstituentException
+
+import torch
+
+
+class PartialConstituentData:
+    def __init__(self):
+        self.starts = [np.array([], dtype=int)]
+        self.ends = [np.array([], dtype=int)]
+        self.labels = [np.array([], dtype=int)]
+
+    def finalize(self, doc, label_vocab):
+        self.starts = np.hstack(self.starts)
+        self.ends = np.hstack(self.ends)
+        self.labels = np.hstack(self.labels)
+
+        # TODO(nikita): Python for loops aren't very fast
+        loc_to_constituent = np.full(len(doc), -1, dtype=int)
+        prev = None
+        for position in range(self.starts.shape[0]):
+            if self.starts[position] != prev:
+                prev = self.starts[position]
+                loc_to_constituent[self.starts[position]] = position
+
+        return ConstituentData(
+            self.starts, self.ends, self.labels, loc_to_constituent, label_vocab
+        )
+
+
+class SentenceWrapper(BaseInputExample):
+    TEXT_NORMALIZATION_MAPPING = {
+        "`": "'",
+        "«": '"',
+        "»": '"',
+        "‘": "'",
+        "’": "'",
+        "“": '"',
+        "”": '"',
+        "„": '"',
+        "‹": "'",
+        "›": "'",
+        "—": "--",  # em dash
+    }
+
+    def __init__(self, spacy_sent):
+        self.sent = spacy_sent
+
+    @property
+    def words(self):
+        return [
+            self.TEXT_NORMALIZATION_MAPPING.get(token.text, token.text)
+            for token in self.sent
+        ]
+
+    @property
+    def space_after(self):
+        return [bool(token.whitespace_) for token in self.sent]
+
+    @property
+    def tree(self):
+        return None
+
+    def leaves(self):
+        return self.words
+
+    def pos(self):
+        return [(word, "UNK") for word in self.words]
+
+
+class BeneparComponent:
+    """
+    Berkeley Neural Parser (benepar) component for spaCy.
+
+    Sample usage:
+    >>> nlp = spacy.load('en_core_web_md')
+    >>> if spacy.__version__.startswith('2'):
+            nlp.add_pipe(BeneparComponent("benepar_en3"))
+        else:
+            nlp.add_pipe("benepar", config={"model": "benepar_en3"})
+    >>> doc = nlp("The quick brown fox jumps over the lazy dog.")
+    >>> sent = list(doc.sents)[0]
+    >>> print(sent._.parse_string)
+
+    This component is only responsible for constituency parsing and (for some
+    trained models) part-of-speech tagging. It should be preceded in the
+    pipeline by other components that can, at minimum, perform tokenization and
+    sentence segmentation.
+    """
+
+    name = "benepar"
+
+    def __init__(
+        self,
+        name,
+        subbatch_max_tokens=500,
+        disable_tagger=False,
+        batch_size="ignored",
+    ):
+        """Load a trained parser model.
+
+        Args:
+            name (str): Model name, or path to pytorch saved model
+            subbatch_max_tokens (int): Maximum number of tokens to process in
+                each batch
+            disable_tagger (bool, default False): Unless disabled, the parser
+                will set predicted part-of-speech tags for the document,
+                overwriting any existing tags provided by spaCy models or
+                previous pipeline steps. This option has no effect for parser
+                models that do not have a part-of-speech tagger built in.
+            batch_size: deprecated and ignored; use subbatch_max_tokens instead
+        """
+        self._parser = load_trained_model(name)
+        if torch.cuda.is_available():
+            self._parser.cuda()
+
+        self.subbatch_max_tokens = subbatch_max_tokens
+        self.disable_tagger = disable_tagger
+
+        self._label_vocab = self._parser.config["label_vocab"]
+        label_vocab_size = max(self._label_vocab.values()) + 1
+        self._label_from_index = [()] * label_vocab_size
+        for label, i in self._label_vocab.items():
+            if label:
+                self._label_from_index[i] = tuple(label.split("::"))
+            else:
+                self._label_from_index[i] = ()
+        self._label_from_index = tuple(self._label_from_index)
+
+        if not self.disable_tagger:
+            tag_vocab = self._parser.config["tag_vocab"]
+            tag_vocab_size = max(tag_vocab.values()) + 1
+            self._tag_from_index = [()] * tag_vocab_size
+            for tag, i in tag_vocab.items():
+                self._tag_from_index[i] = tag
+            self._tag_from_index = tuple(self._tag_from_index)
+        else:
+            self._tag_from_index = None
+
+    def __call__(self, doc):
+        """Update the input document with predicted constituency parses."""
+        # TODO(https://github.com/nikitakit/self-attentive-parser/issues/16): handle
+        # tokens that consist entirely of whitespace.
+        constituent_data = PartialConstituentData()
+        wrapped_sents = [SentenceWrapper(sent) for sent in doc.sents]
+        for sent, parse in zip(
+            doc.sents,
+            self._parser.parse(
+                wrapped_sents,
+                return_compressed=True,
+                subbatch_max_tokens=self.subbatch_max_tokens,
+            ),
+        ):
+            constituent_data.starts.append(parse.starts + sent.start)
+            constituent_data.ends.append(parse.ends + sent.start)
+            constituent_data.labels.append(parse.labels)
+
+            if parse.tags is not None and not self.disable_tagger:
+                for i, tag_id in enumerate(parse.tags):
+                    sent[i].tag_ = self._tag_from_index[tag_id]
+
+        doc._._constituent_data = constituent_data.finalize(doc, self._label_from_index)
+        return doc
+
+
+def create_benepar_component(
+    nlp,
+    name,
+    model: str,
+    subbatch_max_tokens: int,
+    disable_tagger: bool,
+):
+    return BeneparComponent(
+        model,
+        subbatch_max_tokens=subbatch_max_tokens,
+        disable_tagger=disable_tagger,
+    )
+
+
+def register_benepar_component_factory():
+    # Starting with spaCy 3.0, nlp.add_pipe no longer directly accepts
+    # BeneparComponent instances. We must instead register a component factory.
+    import spacy
+
+    if spacy.__version__.startswith("2"):
+        return
+
+    from spacy.language import Language
+
+    Language.factory(
+        "benepar",
+        default_config={
+            "subbatch_max_tokens": 500,
+            "disable_tagger": False,
+        },
+        func=create_benepar_component,
+    )
+
+
+try:
+    register_benepar_component_factory()
+except ImportError:
+    pass
diff --git a/parsing/src/benepar/nkutil.py b/parsing/src/benepar/nkutil.py
new file mode 100644
index 0000000000000000000000000000000000000000..290ad20474d1406f9091aebbbbc960562c9075c1
--- /dev/null
+++ b/parsing/src/benepar/nkutil.py
@@ -0,0 +1,51 @@
+class HParams:
+    _skip_keys = ["populate_arguments", "set_from_args", "print", "to_dict"]
+
+    def __init__(self, **kwargs):
+        for k, v in kwargs.items():
+            setattr(self, k, v)
+
+    def __getitem__(self, item):
+        return getattr(self, item)
+
+    def __setitem__(self, item, value):
+        if not hasattr(self, item):
+            raise KeyError(f"Hyperparameter {item} has not been declared yet")
+        setattr(self, item, value)
+
+    def to_dict(self):
+        res = {}
+        for k in dir(self):
+            if k.startswith("_") or k in self._skip_keys:
+                continue
+            res[k] = self[k]
+        return res
+
+    def populate_arguments(self, parser):
+        for k in dir(self):
+            if k.startswith("_") or k in self._skip_keys:
+                continue
+            v = self[k]
+            k = k.replace("_", "-")
+            if type(v) in (int, float, str):
+                parser.add_argument(f"--{k}", type=type(v), default=v)
+            elif isinstance(v, bool):
+                if not v:
+                    parser.add_argument(f"--{k}", action="store_true")
+                else:
+                    parser.add_argument(f"--no-{k}", action="store_false")
+
+    def set_from_args(self, args):
+        for k in dir(self):
+            if k.startswith("_") or k in self._skip_keys:
+                continue
+            if hasattr(args, k):
+                self[k] = getattr(args, k)
+            elif hasattr(args, f"no_{k}"):
+                self[k] = getattr(args, f"no_{k}")
+
+    def print(self):
+        for k in dir(self):
+            if k.startswith("_") or k in self._skip_keys:
+                continue
+            print(k, repr(self[k]))
diff --git a/parsing/src/benepar/parse_base.py b/parsing/src/benepar/parse_base.py
new file mode 100644
index 0000000000000000000000000000000000000000..9be49169f6ed97148ba6d109c7512a3c0e5feb05
--- /dev/null
+++ b/parsing/src/benepar/parse_base.py
@@ -0,0 +1,216 @@
+from abc import ABC, abstractmethod
+import dataclasses
+from typing import Any, Iterable, List, Optional, Tuple, Union
+
+import nltk
+import numpy as np
+
+
+class BaseInputExample(ABC):
+    """Parser input for a single sentence (abstract interface)."""
+
+    # Subclasses must define the following attributes or properties.
+    # `words` is a list of unicode representations for each word in the sentence
+    # and `space_after` is a list of booleans that indicate whether there is
+    # whitespace after a word. Together, these should form a reversible
+    # tokenization of raw text input. `tree` is an optional gold parse tree.
+    words: List[str]
+    space_after: List[bool]
+    tree: Optional[nltk.Tree]
+
+    @abstractmethod
+    def leaves(self) -> Optional[List[str]]:
+        """Returns leaves to use in the parse tree.
+
+        While `words` must be raw unicode text, these should be whatever is
+        standard for the treebank. For example, '(' in words might correspond to
+        '-LRB-' in leaves, and leaves might include other transformations such
+        as transliteration.
+        """
+        pass
+
+    @abstractmethod
+    def pos(self) -> Optional[List[Tuple[str, str]]]:
+        """Returns a list of (leaf, part-of-speech tag) tuples."""
+        pass
+
+
+@dataclasses.dataclass
+class CompressedParserOutput:
+    """Parser output, encoded as a collection of numpy arrays.
+
+    By default, a parser will return nltk.Tree objects. These have much nicer
+    APIs than the CompressedParserOutput class, and the code involved is simpler
+    and more readable. As a trade-off, code dealing with nltk.Tree objects is
+    slower: the nltk.Tree type itself has some overhead, and algorithms dealing
+    with it are implemented in pure Python as opposed to C or even CUDA. The
+    CompressedParserOutput type is an alternative that has some optimizations
+    for the sole purpose of speeding up inference.
+
+    If trying a new parser type for research purposes, it's safe to ignore this
+    class and the return_compressed argument to parse(). If the parser works
+    well and is being released, the return_compressed argument can then be added
+    with a dedicated fast implementation, or simply by using the from_tree
+    method defined below.
+    """
+
+    # A parse tree is represented as a set of constituents. In the case of
+    # non-binary trees, only the labeled non-terminal nodes are included: there
+    # are no dummy nodes inserted for binarization purposes. However, single
+    # words are always included in the set of constituents, and they may have a
+    # null label if there is no phrasal category above the part-of-speech tag.
+    # All constituents are sorted according to pre-order traversal, and each has
+    # an associated start (the index of the first word in the constituent), end
+    # (1 + the index of the last word in the constituent), and label (index
+    # associated with an external label_vocab dictionary.) These are then stored
+    # in three numpy arrays:
+    starts: Iterable[int]  # Must be a numpy array
+    ends: Iterable[int]  # Must be a numpy array
+    labels: Iterable[int]  # Must be a numpy array
+
+    # Part of speech tag ids as output by the parser (may be None if the parser
+    # does not do POS tagging). These indices are associated with an external
+    # tag_vocab dictionary.
+    tags: Optional[Iterable[int]] = None # Must be None or a numpy array
+
+    def without_predicted_tags(self):
+        return dataclasses.replace(self, tags=None)
+
+    def with_tags(self, tags):
+        return dataclasses.replace(self, tags=tags)
+
+    @classmethod
+    def from_tree(
+        cls, tree: nltk.Tree, label_vocab: dict, tag_vocab: Optional[dict] = None
+    ) -> "CompressedParserOutput":
+        num_words = len(tree.leaves())
+        starts = np.empty(2 * num_words, dtype=int)
+        ends = np.empty(2 * num_words, dtype=int)
+        labels = np.empty(2 * num_words, dtype=int)
+
+        def helper(tree, start, write_idx):
+            nonlocal starts, ends, labels
+            label = []
+            while len(tree) == 1 and not isinstance(tree[0], str):
+                if tree.label() != "TOP":
+                    label.append(tree.label())
+                tree = tree[0]
+
+            if len(tree) == 1 and isinstance(tree[0], str):
+                starts[write_idx] = start
+                ends[write_idx] = start + 1
+                labels[write_idx] = label_vocab["::".join(label)]
+                return start + 1, write_idx + 1
+
+            label.append(tree.label())
+            starts[write_idx] = start
+            labels[write_idx] = label_vocab["::".join(label)]
+
+            end = start
+            new_write_idx = write_idx + 1
+            for child in tree:
+                end, new_write_idx = helper(child, end, new_write_idx)
+
+            ends[write_idx] = end
+            return end, new_write_idx
+
+        _, num_constituents = helper(tree, 0, 0)
+        starts = starts[:num_constituents]
+        ends = ends[:num_constituents]
+        labels = labels[:num_constituents]
+
+        if tag_vocab is None:
+            tags = None
+        else:
+            tags = np.array([tag_vocab[tag] for _, tag in tree.pos()], dtype=int)
+
+        return cls(starts=starts, ends=ends, labels=labels, tags=tags)
+
+    def to_tree(self, leaves, label_from_index: dict, tag_from_index: dict = None):
+        if self.tags is not None:
+            if tag_from_index is None:
+                raise ValueError(
+                    "tags_from_index is required to convert predicted pos tags"
+                )
+            predicted_tags = [tag_from_index[i] for i in self.tags]
+            assert len(leaves) == len(predicted_tags)
+            leaves = [
+                nltk.Tree(tag, [leaf[0] if isinstance(leaf, tuple) else leaf])
+                for tag, leaf in zip(predicted_tags, leaves)
+            ]
+        else:
+            leaves = [
+                nltk.Tree(leaf[1], [leaf[0]])
+                if isinstance(leaf, tuple)
+                else (nltk.Tree("UNK", [leaf]) if isinstance(leaf, str) else leaf)
+                for leaf in leaves
+            ]
+
+        idx = -1
+
+        def helper():
+            nonlocal idx
+            idx += 1
+            i, j, label = (
+                self.starts[idx],
+                self.ends[idx],
+                label_from_index[self.labels[idx]],
+            )
+            if (i + 1) >= j:
+                children = [leaves[i]]
+            else:
+                children = []
+                while (
+                    (idx + 1) < len(self.starts)
+                    and i <= self.starts[idx + 1]
+                    and self.ends[idx + 1] <= j
+                ):
+                    children.extend(helper())
+
+            if label:
+                for sublabel in reversed(label.split("::")):
+                    children = [nltk.Tree(sublabel, children)]
+
+            return children
+
+        children = helper()
+        return nltk.Tree("TOP", children)
+
+
+class BaseParser(ABC):
+    """Parser (abstract interface)"""
+
+    @classmethod
+    @abstractmethod
+    def from_trained(
+        cls, model_name: str, config: dict = None, state_dict: dict = None
+    ) -> "BaseParser":
+        """Load a trained parser."""
+        pass
+
+    @abstractmethod
+    def parallelize(self, *args, **kwargs):
+        """Spread out pre-trained model layers across GPUs."""
+        pass
+
+    @abstractmethod
+    def parse(
+        self,
+        examples: Iterable[BaseInputExample],
+        return_compressed: bool = False,
+        return_scores: bool = False,
+        subbatch_max_tokens: Optional[int] = None,
+    ) -> Union[Iterable[nltk.Tree], Iterable[Any]]:
+        """Parse sentences."""
+        pass
+
+    @abstractmethod
+    def encode_and_collate_subbatches(
+        self, examples: List[BaseInputExample], subbatch_max_tokens: int
+    ) -> List[dict]:
+        """Split batch into sub-batches and convert to tensor features"""
+        pass
+
+    @abstractmethod
+    def compute_loss(self, batch: dict):
+        pass
diff --git a/parsing/src/benepar/parse_chart.py b/parsing/src/benepar/parse_chart.py
new file mode 100644
index 0000000000000000000000000000000000000000..3cf8314885a8b77f01dd71d0636c34eb85d7f5ae
--- /dev/null
+++ b/parsing/src/benepar/parse_chart.py
@@ -0,0 +1,434 @@
+import os
+
+import numpy as np
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from transformers import AutoConfig, AutoModel
+
+from . import char_lstm
+from . import decode_chart
+from . import nkutil
+from .partitioned_transformer import (
+    ConcatPositionalEncoding,
+    FeatureDropout,
+    PartitionedTransformerEncoder,
+    PartitionedTransformerEncoderLayer,
+)
+from . import parse_base
+from . import retokenization
+from . import subbatching
+
+
+class ChartParser(nn.Module, parse_base.BaseParser):
+    def __init__(
+        self,
+        tag_vocab,
+        label_vocab,
+        char_vocab,
+        hparams,
+        pretrained_model_path=None,
+    ):
+        super().__init__()
+        self.config = locals()
+        self.config.pop("self")
+        self.config.pop("__class__")
+        self.config.pop("pretrained_model_path")
+        self.config["hparams"] = hparams.to_dict()
+
+        self.tag_vocab = tag_vocab
+        self.label_vocab = label_vocab
+        self.char_vocab = char_vocab
+
+        self.d_model = hparams.d_model
+
+        self.char_encoder = None
+        self.pretrained_model = None
+        if hparams.use_chars_lstm:
+            assert (
+                not hparams.use_pretrained
+            ), "use_chars_lstm and use_pretrained are mutually exclusive"
+            self.retokenizer = char_lstm.RetokenizerForCharLSTM(self.char_vocab)
+            self.char_encoder = char_lstm.CharacterLSTM(
+                max(self.char_vocab.values()) + 1,
+                hparams.d_char_emb,
+                hparams.d_model // 2,  # Half-size to leave room for
+                # partitioned positional encoding
+                char_dropout=hparams.char_lstm_input_dropout,
+            )
+        elif hparams.use_pretrained:
+            if pretrained_model_path is None:
+                self.retokenizer = retokenization.Retokenizer(
+                    hparams.pretrained_model, retain_start_stop=True
+                )
+                self.pretrained_model = AutoModel.from_pretrained(
+                    hparams.pretrained_model
+                )
+            else:
+                self.retokenizer = retokenization.Retokenizer(
+                    pretrained_model_path, retain_start_stop=True
+                )
+                self.pretrained_model = AutoModel.from_config(
+                    AutoConfig.from_pretrained(pretrained_model_path)
+                )
+            d_pretrained = self.pretrained_model.config.hidden_size
+
+            if hparams.use_encoder:
+                self.project_pretrained = nn.Linear(
+                    d_pretrained, hparams.d_model // 2, bias=False
+                )
+            else:
+                self.project_pretrained = nn.Linear(
+                    d_pretrained, hparams.d_model, bias=False
+                )
+
+        if hparams.use_encoder:
+            self.morpho_emb_dropout = FeatureDropout(hparams.morpho_emb_dropout)
+            self.add_timing = ConcatPositionalEncoding(
+                d_model=hparams.d_model,
+                max_len=hparams.encoder_max_len,
+            )
+            encoder_layer = PartitionedTransformerEncoderLayer(
+                hparams.d_model,
+                n_head=hparams.num_heads,
+                d_qkv=hparams.d_kv,
+                d_ff=hparams.d_ff,
+                ff_dropout=hparams.relu_dropout,
+                residual_dropout=hparams.residual_dropout,
+                attention_dropout=hparams.attention_dropout,
+            )
+            self.encoder = PartitionedTransformerEncoder(
+                encoder_layer, hparams.num_layers
+            )
+        else:
+            self.morpho_emb_dropout = None
+            self.add_timing = None
+            self.encoder = None
+
+        self.f_label = nn.Sequential(
+            nn.Linear(hparams.d_model, hparams.d_label_hidden),
+            nn.LayerNorm(hparams.d_label_hidden),
+            nn.ReLU(),
+            nn.Linear(hparams.d_label_hidden, max(label_vocab.values())),
+        )
+
+        if hparams.predict_tags:
+            self.f_tag = nn.Sequential(
+                nn.Linear(hparams.d_model, hparams.d_tag_hidden),
+                nn.LayerNorm(hparams.d_tag_hidden),
+                nn.ReLU(),
+                nn.Linear(hparams.d_tag_hidden, max(tag_vocab.values()) + 1),
+            )
+            self.tag_loss_scale = hparams.tag_loss_scale
+            self.tag_from_index = {i: label for label, i in tag_vocab.items()}
+        else:
+            self.f_tag = None
+            self.tag_from_index = None
+
+        self.decoder = decode_chart.ChartDecoder(
+            label_vocab=self.label_vocab,
+            force_root_constituent=hparams.force_root_constituent,
+        )
+        self.criterion = decode_chart.SpanClassificationMarginLoss(
+            reduction="sum", force_root_constituent=hparams.force_root_constituent
+        )
+
+        self.parallelized_devices = None
+
+    @property
+    def device(self):
+        if self.parallelized_devices is not None:
+            return self.parallelized_devices[0]
+        else:
+            return next(self.f_label.parameters()).device
+
+    @property
+    def output_device(self):
+        if self.parallelized_devices is not None:
+            return self.parallelized_devices[1]
+        else:
+            return next(self.f_label.parameters()).device
+
+    def parallelize(self, *args, **kwargs):
+        self.parallelized_devices = (torch.device("cuda", 0), torch.device("cuda", 1))
+        for child in self.children():
+            if child != self.pretrained_model:
+                child.to(self.output_device)
+        self.pretrained_model.parallelize(*args, **kwargs)
+
+    @classmethod
+    def from_trained(cls, model_path):
+        if os.path.isdir(model_path):
+            # Multi-file format used when exporting models for release.
+            # Unlike the checkpoints saved during training, these files include
+            # all tokenizer parameters and a copy of the pre-trained model
+            # config (rather than downloading these on-demand).
+            config = AutoConfig.from_pretrained(model_path).benepar
+            state_dict = torch.load(
+                os.path.join(model_path, "benepar_model.bin"), map_location="cpu"
+            )
+            config["pretrained_model_path"] = model_path
+        else:
+            # Single-file format used for saving checkpoints during training.
+            data = torch.load(model_path, map_location="cpu")
+            config = data["config"]
+            state_dict = data["state_dict"]
+
+        hparams = config["hparams"]
+
+        if "force_root_constituent" not in hparams:
+            hparams["force_root_constituent"] = True
+
+        config["hparams"] = nkutil.HParams(**hparams)
+        parser = cls(**config)
+        parser.load_state_dict(state_dict)
+        return parser
+
+    def encode(self, example):
+        if self.char_encoder is not None:
+            encoded = self.retokenizer(example.words, return_tensors="np")
+        else:
+            encoded = self.retokenizer(example.words, example.space_after)
+
+        if example.tree is not None:
+            encoded["span_labels"] = torch.tensor(
+                self.decoder.chart_from_tree(example.tree)
+            )
+            if self.f_tag is not None:
+                encoded["tag_labels"] = torch.tensor(
+                    [-100] + [self.tag_vocab[tag] for _, tag in example.pos()] + [-100]
+                )
+        return encoded
+
+    def pad_encoded(self, encoded_batch):
+        batch = self.retokenizer.pad(
+            [
+                {
+                    k: v
+                    for k, v in example.items()
+                    if (k != "span_labels" and k != "tag_labels")
+                }
+                for example in encoded_batch
+            ],
+            return_tensors="pt",
+        )
+        if encoded_batch and "span_labels" in encoded_batch[0]:
+            batch["span_labels"] = decode_chart.pad_charts(
+                [example["span_labels"] for example in encoded_batch]
+            )
+        if encoded_batch and "tag_labels" in encoded_batch[0]:
+            batch["tag_labels"] = nn.utils.rnn.pad_sequence(
+                [example["tag_labels"] for example in encoded_batch],
+                batch_first=True,
+                padding_value=-100,
+            )
+        return batch
+
+    def _get_lens(self, encoded_batch):
+        if self.pretrained_model is not None:
+            return [len(encoded["input_ids"]) for encoded in encoded_batch]
+        return [len(encoded["valid_token_mask"]) for encoded in encoded_batch]
+
+    def encode_and_collate_subbatches(self, examples, subbatch_max_tokens):
+        batch_size = len(examples)
+        batch_num_tokens = sum(len(x.words) for x in examples)
+        encoded = [self.encode(example) for example in examples]
+
+        res = []
+        for ids, subbatch_encoded in subbatching.split(
+            encoded, costs=self._get_lens(encoded), max_cost=subbatch_max_tokens
+        ):
+            subbatch = self.pad_encoded(subbatch_encoded)
+            subbatch["batch_size"] = batch_size
+            subbatch["batch_num_tokens"] = batch_num_tokens
+            res.append((len(ids), subbatch))
+        return res
+
+    def forward(self, batch):
+        valid_token_mask = batch["valid_token_mask"].to(self.output_device)
+
+        if (
+            self.encoder is not None
+            and valid_token_mask.shape[1] > self.add_timing.timing_table.shape[0]
+        ):
+            raise ValueError(
+                "Sentence of length {} exceeds the maximum supported length of "
+                "{}".format(
+                    valid_token_mask.shape[1] - 2,
+                    self.add_timing.timing_table.shape[0] - 2,
+                )
+            )
+
+        if self.char_encoder is not None:
+            assert isinstance(self.char_encoder, char_lstm.CharacterLSTM)
+            char_ids = batch["char_ids"].to(self.device)
+            extra_content_annotations = self.char_encoder(char_ids, valid_token_mask)
+        elif self.pretrained_model is not None:
+            input_ids = batch["input_ids"].to(self.device)
+            words_from_tokens = batch["words_from_tokens"].to(self.output_device)
+            pretrained_attention_mask = batch["attention_mask"].to(self.device)
+
+            extra_kwargs = {}
+            if "token_type_ids" in batch:
+                extra_kwargs["token_type_ids"] = batch["token_type_ids"].to(self.device)
+            if "decoder_input_ids" in batch:
+                extra_kwargs["decoder_input_ids"] = batch["decoder_input_ids"].to(
+                    self.device
+                )
+                extra_kwargs["decoder_attention_mask"] = batch[
+                    "decoder_attention_mask"
+                ].to(self.device)
+
+            pretrained_out = self.pretrained_model(
+                input_ids, attention_mask=pretrained_attention_mask, **extra_kwargs
+            )
+            features = pretrained_out.last_hidden_state.to(self.output_device)
+            features = features[
+                torch.arange(features.shape[0])[:, None],
+                # Note that words_from_tokens uses index -100 for invalid positions
+                F.relu(words_from_tokens),
+            ]
+            features.masked_fill_(~valid_token_mask[:, :, None], 0)
+            if self.encoder is not None:
+                extra_content_annotations = self.project_pretrained(features)
+
+        if self.encoder is not None:
+            encoder_in = self.add_timing(
+                self.morpho_emb_dropout(extra_content_annotations)
+            )
+
+            annotations = self.encoder(encoder_in, valid_token_mask)
+            # Rearrange the annotations to ensure that the transition to
+            # fenceposts captures an even split between position and content.
+
+            annotations = torch.cat(
+                [
+                    annotations[..., 0::2],
+                    annotations[..., 1::2],
+                ],
+                -1,
+            )
+        else:
+            assert self.pretrained_model is not None
+            annotations = self.project_pretrained(features)
+
+        if self.f_tag is not None:
+            tag_scores = self.f_tag(annotations)
+        else:
+            tag_scores = None
+
+        fencepost_annotations = torch.cat(
+            [
+                annotations[:, :-1, : self.d_model // 2],
+                annotations[:, 1:, self.d_model // 2 :],
+            ],
+            -1,
+        )
+
+        # Note that the bias added to the final layer norm is useless because
+        # this subtraction gets rid of it
+        span_features = (
+            torch.unsqueeze(fencepost_annotations, 1)
+            - torch.unsqueeze(fencepost_annotations, 2)
+        )[:, :-1, 1:]
+        span_scores = self.f_label(span_features)
+        span_scores = torch.cat(
+            [span_scores.new_zeros(span_scores.shape[:-1] + (1,)), span_scores], -1
+        )
+        return span_scores, tag_scores
+
+    def compute_loss(self, batch):
+        span_scores, tag_scores = self.forward(batch)
+        span_labels = batch["span_labels"].to(span_scores.device)
+        span_loss = self.criterion(span_scores, span_labels)
+        # Divide by the total batch size, not by the subbatch size
+        span_loss = span_loss / batch["batch_size"]
+        if tag_scores is None:
+            return span_loss
+        else:
+            tag_labels = batch["tag_labels"].to(tag_scores.device)
+            tag_loss = self.tag_loss_scale * F.cross_entropy(
+                tag_scores.reshape((-1, tag_scores.shape[-1])),
+                tag_labels.reshape((-1,)),
+                reduction="sum",
+                ignore_index=-100,
+            )
+            tag_loss = tag_loss / batch["batch_num_tokens"]
+            return span_loss + tag_loss
+
+    def _parse_encoded(
+        self, examples, encoded, return_compressed=False, return_scores=False
+    ):
+        with torch.no_grad():
+            batch = self.pad_encoded(encoded)
+            span_scores, tag_scores = self.forward(batch)
+            if return_scores:
+                span_scores_np = span_scores.cpu().numpy()
+            else:
+                # Start/stop tokens don't count, so subtract 2
+                lengths = batch["valid_token_mask"].sum(-1) - 2
+                charts_np = self.decoder.charts_from_pytorch_scores_batched(
+                    span_scores, lengths.to(span_scores.device)
+                )
+            if tag_scores is not None:
+                tag_ids_np = tag_scores.argmax(-1).cpu().numpy()
+            else:
+                tag_ids_np = None
+
+        for i in range(len(encoded)):
+            example_len = len(examples[i].words)
+            if return_scores:
+                yield span_scores_np[i, :example_len, :example_len]
+            elif return_compressed:
+                output = self.decoder.compressed_output_from_chart(charts_np[i])
+                if tag_ids_np is not None:
+                    output = output.with_tags(tag_ids_np[i, 1 : example_len + 1])
+                yield output
+            else:
+                if tag_scores is None:
+                    leaves = examples[i].pos()
+                else:
+                    predicted_tags = [
+                        self.tag_from_index[i]
+                        for i in tag_ids_np[i, 1 : example_len + 1]
+                    ]
+                    leaves = [
+                        (word, predicted_tag)
+                        for predicted_tag, (word, gold_tag) in zip(
+                            predicted_tags, examples[i].pos()
+                        )
+                    ]
+                yield self.decoder.tree_from_chart(charts_np[i], leaves=leaves)
+
+    def parse(
+        self,
+        examples,
+        return_compressed=False,
+        return_scores=False,
+        subbatch_max_tokens=None,
+    ):
+        training = self.training
+        self.eval()
+        encoded = [self.encode(example) for example in examples]
+        if subbatch_max_tokens is not None:
+            res = subbatching.map(
+                self._parse_encoded,
+                examples,
+                encoded,
+                costs=self._get_lens(encoded),
+                max_cost=subbatch_max_tokens,
+                return_compressed=return_compressed,
+                return_scores=return_scores,
+            )
+        else:
+            res = self._parse_encoded(
+                examples,
+                encoded,
+                return_compressed=return_compressed,
+                return_scores=return_scores,
+            )
+            res = list(res)
+        self.train(training)
+        return res
diff --git a/parsing/src/benepar/partitioned_transformer.py b/parsing/src/benepar/partitioned_transformer.py
new file mode 100644
index 0000000000000000000000000000000000000000..a078b41b5c26e1ec283794d735e0e0e9bbe29201
--- /dev/null
+++ b/parsing/src/benepar/partitioned_transformer.py
@@ -0,0 +1,206 @@
+"""
+Transformer with partitioned content and position features.
+
+See section 3 of https://arxiv.org/pdf/1805.01052.pdf
+"""
+
+import copy
+import math
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class FeatureDropoutFunction(torch.autograd.function.InplaceFunction):
+    @staticmethod
+    def forward(ctx, input, p=0.5, train=False, inplace=False):
+        if p < 0 or p > 1:
+            raise ValueError(
+                "dropout probability has to be between 0 and 1, but got {}".format(p)
+            )
+
+        ctx.p = p
+        ctx.train = train
+        ctx.inplace = inplace
+
+        if ctx.inplace:
+            ctx.mark_dirty(input)
+            output = input
+        else:
+            output = input.clone()
+
+        if ctx.p > 0 and ctx.train:
+            ctx.noise = torch.empty(
+                (input.size(0), input.size(-1)),
+                dtype=input.dtype,
+                layout=input.layout,
+                device=input.device,
+            )
+            if ctx.p == 1:
+                ctx.noise.fill_(0)
+            else:
+                ctx.noise.bernoulli_(1 - ctx.p).div_(1 - ctx.p)
+            ctx.noise = ctx.noise[:, None, :]
+            output.mul_(ctx.noise)
+
+        return output
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        if ctx.p > 0 and ctx.train:
+            return grad_output.mul(ctx.noise), None, None, None
+        else:
+            return grad_output, None, None, None
+
+
+class FeatureDropout(nn.Dropout):
+    """
+    Feature-level dropout: takes an input of size len x num_features and drops
+    each feature with probabibility p. A feature is dropped across the full
+    portion of the input that corresponds to a single batch element.
+    """
+
+    def forward(self, x):
+        if isinstance(x, tuple):
+            x_c, x_p = x
+            x_c = FeatureDropoutFunction.apply(x_c, self.p, self.training, self.inplace)
+            x_p = FeatureDropoutFunction.apply(x_p, self.p, self.training, self.inplace)
+            return x_c, x_p
+        else:
+            return FeatureDropoutFunction.apply(x, self.p, self.training, self.inplace)
+
+
+class PartitionedReLU(nn.ReLU):
+    def forward(self, x):
+        if isinstance(x, tuple):
+            x_c, x_p = x
+        else:
+            x_c, x_p = torch.chunk(x, 2, dim=-1)
+        return super().forward(x_c), super().forward(x_p)
+
+
+class PartitionedLinear(nn.Module):
+    def __init__(self, in_features, out_features, bias=True):
+        super().__init__()
+        self.linear_c = nn.Linear(in_features // 2, out_features // 2, bias)
+        self.linear_p = nn.Linear(in_features // 2, out_features // 2, bias)
+
+    def forward(self, x):
+        if isinstance(x, tuple):
+            x_c, x_p = x
+        else:
+            x_c, x_p = torch.chunk(x, 2, dim=-1)
+
+        out_c = self.linear_c(x_c)
+        out_p = self.linear_p(x_p)
+        return out_c, out_p
+
+
+class PartitionedMultiHeadAttention(nn.Module):
+    def __init__(
+        self, d_model, n_head, d_qkv, attention_dropout=0.1, initializer_range=0.02
+    ):
+        super().__init__()
+
+        self.w_qkv_c = nn.Parameter(torch.Tensor(n_head, d_model // 2, 3, d_qkv // 2))
+        self.w_qkv_p = nn.Parameter(torch.Tensor(n_head, d_model // 2, 3, d_qkv // 2))
+        self.w_o_c = nn.Parameter(torch.Tensor(n_head, d_qkv // 2, d_model // 2))
+        self.w_o_p = nn.Parameter(torch.Tensor(n_head, d_qkv // 2, d_model // 2))
+
+        bound = math.sqrt(3.0) * initializer_range
+        for param in [self.w_qkv_c, self.w_qkv_p, self.w_o_c, self.w_o_p]:
+            nn.init.uniform_(param, -bound, bound)
+        self.scaling_factor = 1 / d_qkv ** 0.5
+
+        self.dropout = nn.Dropout(attention_dropout)
+
+    def forward(self, x, mask=None):
+        if isinstance(x, tuple):
+            x_c, x_p = x
+        else:
+            x_c, x_p = torch.chunk(x, 2, dim=-1)
+        qkv_c = torch.einsum("btf,hfca->bhtca", x_c, self.w_qkv_c)
+        qkv_p = torch.einsum("btf,hfca->bhtca", x_p, self.w_qkv_p)
+        q_c, k_c, v_c = [c.squeeze(dim=3) for c in torch.chunk(qkv_c, 3, dim=3)]
+        q_p, k_p, v_p = [c.squeeze(dim=3) for c in torch.chunk(qkv_p, 3, dim=3)]
+        q = torch.cat([q_c, q_p], dim=-1) * self.scaling_factor
+        k = torch.cat([k_c, k_p], dim=-1)
+        v = torch.cat([v_c, v_p], dim=-1)
+        dots = torch.einsum("bhqa,bhka->bhqk", q, k)
+        if mask is not None:
+            dots.data.masked_fill_(~mask[:, None, None, :], -float("inf"))
+        probs = F.softmax(dots, dim=-1)
+        probs = self.dropout(probs)
+        o = torch.einsum("bhqk,bhka->bhqa", probs, v)
+        o_c, o_p = torch.chunk(o, 2, dim=-1)
+        out_c = torch.einsum("bhta,haf->btf", o_c, self.w_o_c)
+        out_p = torch.einsum("bhta,haf->btf", o_p, self.w_o_p)
+        return out_c, out_p
+
+
+class PartitionedTransformerEncoderLayer(nn.Module):
+    def __init__(
+        self,
+        d_model,
+        n_head,
+        d_qkv,
+        d_ff,
+        ff_dropout=0.1,
+        residual_dropout=0.1,
+        attention_dropout=0.1,
+        activation=PartitionedReLU(),
+    ):
+        super().__init__()
+        self.self_attn = PartitionedMultiHeadAttention(
+            d_model, n_head, d_qkv, attention_dropout=attention_dropout
+        )
+        self.linear1 = PartitionedLinear(d_model, d_ff)
+        self.ff_dropout = FeatureDropout(ff_dropout)
+        self.linear2 = PartitionedLinear(d_ff, d_model)
+
+        self.norm_attn = nn.LayerNorm(d_model)
+        self.norm_ff = nn.LayerNorm(d_model)
+        self.residual_dropout_attn = FeatureDropout(residual_dropout)
+        self.residual_dropout_ff = FeatureDropout(residual_dropout)
+
+        self.activation = activation
+
+    def forward(self, x, mask=None):
+        residual = self.self_attn(x, mask=mask)
+        residual = torch.cat(residual, dim=-1)
+        residual = self.residual_dropout_attn(residual)
+        x = self.norm_attn(x + residual)
+        residual = self.linear2(self.ff_dropout(self.activation(self.linear1(x))))
+        residual = torch.cat(residual, dim=-1)
+        residual = self.residual_dropout_ff(residual)
+        x = self.norm_ff(x + residual)
+        return x
+
+
+class PartitionedTransformerEncoder(nn.Module):
+    def __init__(self, encoder_layer, n_layers):
+        super().__init__()
+        self.layers = nn.ModuleList(
+            [copy.deepcopy(encoder_layer) for i in range(n_layers)]
+        )
+
+    def forward(self, x, mask=None):
+        for layer in self.layers:
+            x = layer(x, mask=mask)
+        return x
+
+
+class ConcatPositionalEncoding(nn.Module):
+    def __init__(self, d_model=256, max_len=512):
+        super().__init__()
+        self.timing_table = nn.Parameter(torch.FloatTensor(max_len, d_model // 2))
+        nn.init.normal_(self.timing_table)
+        self.norm = nn.LayerNorm(d_model)
+
+    def forward(self, x):
+        timing = self.timing_table[None, : x.shape[1], :]
+        x, timing = torch.broadcast_tensors(x, timing)
+        out = torch.cat([x, timing], dim=-1)
+        out = self.norm(out)
+        return out
diff --git a/parsing/src/benepar/ptb_unescape.py b/parsing/src/benepar/ptb_unescape.py
new file mode 100644
index 0000000000000000000000000000000000000000..b9403d492257003c9145c494314b6e670da61fcc
--- /dev/null
+++ b/parsing/src/benepar/ptb_unescape.py
@@ -0,0 +1,83 @@
+PTB_UNESCAPE_MAPPING = {
+    "«": '"',
+    "»": '"',
+    "‘": "'",
+    "’": "'",
+    "“": '"',
+    "”": '"',
+    "„": '"',
+    "‹": "'",
+    "›": "'",
+    "\u2013": "--",  # en dash
+    "\u2014": "--",  # em dash
+}
+
+NO_SPACE_BEFORE = {"-RRB-", "-RCB-", "-RSB-", "''"} | set("%.,!?:;")
+NO_SPACE_AFTER = {"-LRB-", "-LCB-", "-LSB-", "``", "`"} | set("$#")
+NO_SPACE_BEFORE_TOKENS_ENGLISH = {"'", "'s", "'ll", "'re", "'d", "'m", "'ve"}
+PTB_DASH_ESCAPED = {"-RRB-", "-RCB-", "-RSB-", "-LRB-", "-LCB-", "-LSB-", "--"}
+
+
+def ptb_unescape(words):
+    cleaned_words = []
+    for word in words:
+        word = PTB_UNESCAPE_MAPPING.get(word, word)
+        # This un-escaping for / and * was not yet added for the
+        # parser version in https://arxiv.org/abs/1812.11760v1
+        # and related model releases (e.g. benepar_en2)
+        word = word.replace("\\/", "/").replace("\\*", "*")
+        # Mid-token punctuation occurs in biomedical text
+        word = word.replace("-LSB-", "[").replace("-RSB-", "]")
+        word = word.replace("-LRB-", "(").replace("-RRB-", ")")
+        word = word.replace("-LCB-", "{").replace("-RCB-", "}")
+        word = word.replace("``", '"').replace("`", "'").replace("''", '"')
+        cleaned_words.append(word)
+    return cleaned_words
+
+
+def guess_space_after_non_english(escaped_words):
+    sp_after = [True for _ in escaped_words]
+    for i, word in enumerate(escaped_words):
+        if i > 0 and (
+            (
+                word.startswith("-")
+                and not any(word.startswith(x) for x in PTB_DASH_ESCAPED)
+            )
+            or any(word.startswith(x) for x in NO_SPACE_BEFORE)
+            or word == "'"
+        ):
+            sp_after[i - 1] = False
+        if (
+            word.endswith("-") and not any(word.endswith(x) for x in PTB_DASH_ESCAPED)
+        ) or any(word.endswith(x) for x in NO_SPACE_AFTER):
+            sp_after[i] = False
+
+    return sp_after
+
+
+def guess_space_after(escaped_words, for_english=True):
+    if not for_english:
+        return guess_space_after_non_english(escaped_words)
+
+    sp_after = [True for _ in escaped_words]
+    for i, word in enumerate(escaped_words):
+        if word.lower() == "n't" and i > 0:
+            sp_after[i - 1] = False
+        elif word.lower() == "not" and i > 0 and escaped_words[i - 1].lower() == "can":
+            sp_after[i - 1] = False
+
+        if i > 0 and (
+            (
+                word.startswith("-")
+                and not any(word.startswith(x) for x in PTB_DASH_ESCAPED)
+            )
+            or any(word.startswith(x) for x in NO_SPACE_BEFORE)
+            or word.lower() in NO_SPACE_BEFORE_TOKENS_ENGLISH
+        ):
+            sp_after[i - 1] = False
+        if (
+            word.endswith("-") and not any(word.endswith(x) for x in PTB_DASH_ESCAPED)
+        ) or any(word.endswith(x) for x in NO_SPACE_AFTER):
+            sp_after[i] = False
+
+    return sp_after
diff --git a/parsing/src/benepar/retokenization.py b/parsing/src/benepar/retokenization.py
new file mode 100644
index 0000000000000000000000000000000000000000..42f77188c5faf721f0587aeeac6da302ac3d8be3
--- /dev/null
+++ b/parsing/src/benepar/retokenization.py
@@ -0,0 +1,258 @@
+"""
+Converts from linguistically motivated word-based tokenization to subword
+tokenization used by pre-trained models.
+"""
+
+import numpy as np
+import torch
+import transformers
+
+
+def retokenize(
+    tokenizer,
+    words,
+    space_after,
+    return_attention_mask=True,
+    return_offsets_mapping=False,
+    return_tensors=None,
+    **kwargs
+):
+    """Re-tokenize into subwords.
+
+    Args:
+        tokenizer: An instance of transformers.PreTrainedTokenizerFast
+        words: List of words
+        space_after: A list of the same length as `words`, indicating whether
+            whitespace follows each word.
+        **kwargs: all remaining arguments are passed on to tokenizer.__call__
+
+    Returns:
+        The output of tokenizer.__call__, with one additional dictionary field:
+        - **words_from_tokens** -- List of the same length as `words`, where
+          each entry is the index of the *last* subword that overlaps the
+          corresponding word.
+    """
+    s = "".join([w + (" " if sp else "") for w, sp in zip(words, space_after)])
+    word_offset_starts = np.cumsum(
+        [0] + [len(w) + (1 if sp else 0) for w, sp in zip(words, space_after)]
+    )[:-1]
+    word_offset_ends = word_offset_starts + np.asarray([len(w) for w in words])
+
+    tokenized = tokenizer(
+        s,
+        return_attention_mask=return_attention_mask,
+        return_offsets_mapping=True,
+        return_tensors=return_tensors,
+        **kwargs
+    )
+    if return_offsets_mapping:
+        token_offset_mapping = tokenized["offset_mapping"]
+    else:
+        token_offset_mapping = tokenized.pop("offset_mapping")
+    if return_tensors is not None:
+        token_offset_mapping = np.asarray(token_offset_mapping)[0].tolist()
+
+    offset_mapping_iter = iter(
+        [
+            (i, (start, end))
+            for (i, (start, end)) in enumerate(token_offset_mapping)
+            if start != end
+        ]
+    )
+    token_idx, (token_start, token_end) = next(offset_mapping_iter)
+    words_from_tokens = [-100] * len(words)
+    for word_idx, (word_start, word_end) in enumerate(
+        zip(word_offset_starts, word_offset_ends)
+    ):
+        while token_end <= word_start:
+            token_idx, (token_start, token_end) = next(offset_mapping_iter)
+        if token_end > word_end:
+            words_from_tokens[word_idx] = token_idx
+        while token_end <= word_end:
+            words_from_tokens[word_idx] = token_idx
+            try:
+                token_idx, (token_start, token_end) = next(offset_mapping_iter)
+            except StopIteration:
+                assert word_idx == len(words) - 1
+                break
+    if return_tensors == "np":
+        words_from_tokens = np.asarray(words_from_tokens, dtype=int)
+    elif return_tensors == "pt":
+        words_from_tokens = torch.tensor(words_from_tokens, dtype=torch.long)
+    elif return_tensors == "tf":
+        raise NotImplementedError("Returning tf tensors is not implemented")
+    tokenized["words_from_tokens"] = words_from_tokens
+    return tokenized
+
+
+class Retokenizer:
+    def __init__(self, pretrained_model_name_or_path, retain_start_stop=False):
+        self.tokenizer = transformers.AutoTokenizer.from_pretrained(
+            pretrained_model_name_or_path, fast=True
+        )
+        if not self.tokenizer.is_fast:
+            raise NotImplementedError(
+                "Converting from treebank tokenization to tokenization used by a "
+                "pre-trained model requires a 'fast' tokenizer, which appears to not "
+                "be available for this pre-trained model type."
+            )
+        self.retain_start_stop = retain_start_stop
+        self.is_t5 = "T5Tokenizer" in str(type(self.tokenizer))
+        self.is_gpt2 = "GPT2Tokenizer" in str(type(self.tokenizer))
+
+        if self.is_gpt2:
+            # The provided GPT-2 tokenizer does not specify a padding token by default
+            self.tokenizer.pad_token = self.tokenizer.eos_token
+
+        if self.retain_start_stop:
+            # When retain_start_stop is set, the next layer after the pre-trained model
+            # expects start and stop token embeddings. For BERT these can naturally be
+            # the feature vectors for CLS and SEP, but pre-trained models differ in the
+            # special tokens that they use. This code attempts to find special token
+            # positions for each pre-trained model.
+            dummy_ids = self.tokenizer.build_inputs_with_special_tokens([-100])
+            if self.is_t5:
+                # For T5 we use the output from the decoder, which accepts inputs that
+                # are shifted relative to the encoder.
+                dummy_ids = [self.tokenizer.pad_token_id] + dummy_ids
+            if self.is_gpt2:
+                # For GPT-2, we append an eos token if special tokens are needed
+                dummy_ids = dummy_ids + [self.tokenizer.eos_token_id]
+            try:
+                input_idx = dummy_ids.index(-100)
+            except ValueError:
+                raise NotImplementedError(
+                    "Could not automatically infer how to extract start/stop tokens "
+                    "from this pre-trained model"
+                )
+            num_prefix_tokens = input_idx
+            num_suffix_tokens = len(dummy_ids) - input_idx - 1
+            self.start_token_idx = None
+            self.stop_token_idx = None
+            if num_prefix_tokens > 0:
+                self.start_token_idx = num_prefix_tokens - 1
+            if num_suffix_tokens > 0:
+                self.stop_token_idx = -num_suffix_tokens
+            if self.start_token_idx is None and num_suffix_tokens > 0:
+                self.start_token_idx = -1
+            if self.stop_token_idx is None and num_prefix_tokens > 0:
+                self.stop_token_idx = 0
+            if self.start_token_idx is None or self.stop_token_idx is None:
+                assert num_prefix_tokens == 0 and num_suffix_tokens == 0
+                raise NotImplementedError(
+                    "Could not automatically infer how to extract start/stop tokens "
+                    "from this pre-trained model because the associated tokenizer "
+                    "appears not to add any special start/stop/cls/sep/etc. tokens "
+                    "to the sequence."
+                )
+
+    def __call__(self, words, space_after, **kwargs):
+        example = retokenize(self.tokenizer, words, space_after, **kwargs)
+        if self.is_t5:
+            # decoder_input_ids (which are shifted wrt input_ids) will be created after
+            # padding, but we adjust words_from_tokens now, in anticipation.
+            if isinstance(example["words_from_tokens"], list):
+                example["words_from_tokens"] = [
+                    x + 1 for x in example["words_from_tokens"]
+                ]
+            else:
+                example["words_from_tokens"] += 1
+        if self.retain_start_stop:
+            num_tokens = len(example["input_ids"])
+            if self.is_t5:
+                num_tokens += 1
+            if self.is_gpt2:
+                num_tokens += 1
+                if kwargs.get("return_tensors") == "pt":
+                    example["input_ids"] = torch.cat(
+                        example["input_ids"],
+                        torch.tensor([self.tokenizer.eos_token_id]),
+                    )
+                    example["attention_mask"] = torch.cat(
+                        example["attention_mask"], torch.tensor([1])
+                    )
+                else:
+                    example["input_ids"].append(self.tokenizer.eos_token_id)
+                    example["attention_mask"].append(1)
+            if num_tokens > self.tokenizer.model_max_length:
+                raise ValueError(
+                    f"Sentence of length {num_tokens} (in sub-word tokens) exceeds the "
+                    f"maximum supported length of {self.tokenizer.model_max_length}"
+                )
+            start_token_idx = (
+                self.start_token_idx
+                if self.start_token_idx >= 0
+                else num_tokens + self.start_token_idx
+            )
+            stop_token_idx = (
+                self.stop_token_idx
+                if self.stop_token_idx >= 0
+                else num_tokens + self.stop_token_idx
+            )
+            if kwargs.get("return_tensors") == "pt":
+                example["words_from_tokens"] = torch.cat(
+                    [
+                        torch.tensor([start_token_idx]),
+                        example["words_from_tokens"],
+                        torch.tensor([stop_token_idx]),
+                    ]
+                )
+            else:
+                example["words_from_tokens"] = (
+                    [start_token_idx] + example["words_from_tokens"] + [stop_token_idx]
+                )
+        return example
+
+    def pad(self, encoded_inputs, return_tensors=None, **kwargs):
+        if return_tensors != "pt":
+            raise NotImplementedError("Only return_tensors='pt' is supported.")
+        res = self.tokenizer.pad(
+            [
+                {k: v for k, v in example.items() if k != "words_from_tokens"}
+                for example in encoded_inputs
+            ],
+            return_tensors=return_tensors,
+            **kwargs
+        )
+        if self.tokenizer.padding_side == "right":
+            res["words_from_tokens"] = torch.nn.utils.rnn.pad_sequence(
+                [
+                    torch.tensor(example["words_from_tokens"])
+                    for example in encoded_inputs
+                ],
+                batch_first=True,
+                padding_value=-100,
+            )
+        else:
+            # XLNet adds padding tokens on the left of the sequence, so
+            # words_from_tokens must be adjusted to skip the added padding tokens.
+            assert self.tokenizer.padding_side == "left"
+            res["words_from_tokens"] = torch.nn.utils.rnn.pad_sequence(
+                [
+                    torch.tensor(example["words_from_tokens"])
+                    + (res["input_ids"].shape[-1] - len(example["input_ids"]))
+                    for example in encoded_inputs
+                ],
+                batch_first=True,
+                padding_value=-100,
+            )
+
+        if self.is_t5:
+            res["decoder_input_ids"] = torch.cat(
+                [
+                    torch.full_like(
+                        res["input_ids"][:, :1], self.tokenizer.pad_token_id
+                    ),
+                    res["input_ids"],
+                ],
+                1,
+            )
+            res["decoder_attention_mask"] = torch.cat(
+                [
+                    torch.ones_like(res["attention_mask"][:, :1]),
+                    res["attention_mask"],
+                ],
+                1,
+            )
+        res["valid_token_mask"] = res["words_from_tokens"] != -100
+        return res
diff --git a/parsing/src/benepar/spacy_plugin.py b/parsing/src/benepar/spacy_plugin.py
new file mode 100644
index 0000000000000000000000000000000000000000..3923cc46064c5f098a2276a4312c09bc65b8891a
--- /dev/null
+++ b/parsing/src/benepar/spacy_plugin.py
@@ -0,0 +1,13 @@
+__all__ = ["BeneparComponent", "NonConstituentException"]
+
+import warnings
+
+from .integrations.spacy_plugin import BeneparComponent, NonConstituentException
+
+warnings.warn(
+    "BeneparComponent and NonConstituentException have been moved to the benepar "
+    "module. Use `from benepar import BeneparComponent, NonConstituentException` "
+    "instead of benepar.spacy_plugin. The benepar.spacy_plugin namespace is deprecated "
+    "and will be removed in a future version.",
+    FutureWarning,
+)
diff --git a/parsing/src/benepar/subbatching.py b/parsing/src/benepar/subbatching.py
new file mode 100644
index 0000000000000000000000000000000000000000..53bed87ce8743034a670e358acc947709c57ee3d
--- /dev/null
+++ b/parsing/src/benepar/subbatching.py
@@ -0,0 +1,62 @@
+"""
+Utilities for splitting batches of examples into smaller sub-batches.
+
+This is useful during training when the batch size is too large to fit on GPU,
+meaning that gradient accumulation across multiple sub-batches must be used.
+It is also useful for batching examples during evaluation. Unlike a naive
+approach, this code groups examples with similar lengths to reduce the amount
+of wasted computation due to padding. 
+"""
+
+import numpy as np
+
+
+def split(*data, costs, max_cost):
+    """Splits a batch of input items into sub-batches.
+
+    Args:
+        *data: One or more lists of input items, all of the same length
+        costs: A list of costs for each item
+        max_cost: Maximum total cost for each sub-batch
+
+    Yields:
+        (example_ids, *subbatch_data) tuples.
+    """
+    costs = np.asarray(costs, dtype=int)
+    costs_argsort = np.argsort(costs).tolist()
+
+    subbatch_size = 1
+    while costs_argsort:
+        if subbatch_size == len(costs_argsort) or (
+            subbatch_size * costs[costs_argsort[subbatch_size]] > max_cost
+        ):
+            subbatch_item_ids = costs_argsort[:subbatch_size]
+            subbatch_data = [[items[i] for i in subbatch_item_ids] for items in data]
+            yield (subbatch_item_ids,) + tuple(subbatch_data)
+            costs_argsort = costs_argsort[subbatch_size:]
+            subbatch_size = 1
+        else:
+            subbatch_size += 1
+
+
+def map(func, *data, costs, max_cost, **common_kwargs):
+    """Maps a function over subbatches of input items.
+
+    Args:
+        func: Function to map over the data
+        *data: One or more lists of input items, all of the same length.
+        costs: A list of costs for each item
+        max_cost: Maximum total cost for each sub-batch
+        **common_kwargs: Keyword arguments to pass to all calls of func
+
+    Returns:
+        A list of outputs from calling func(*subbatch_data, **kwargs) for each
+        subbatch, and then rearranging the outputs from func into the original
+        item order.
+    """
+    res = [None] * len(data[0])
+    for item_ids, *subbatch_items in split(*data, costs=costs, max_cost=max_cost):
+        subbatch_out = func(*subbatch_items, **common_kwargs)
+        for item_id, item_out in zip(item_ids, subbatch_out):
+            res[item_id] = item_out
+    return res
diff --git a/parsing/src/evaluate.py b/parsing/src/evaluate.py
new file mode 100644
index 0000000000000000000000000000000000000000..198f3f11731299cbb83a5ec02c3e644d4cb1a36f
--- /dev/null
+++ b/parsing/src/evaluate.py
@@ -0,0 +1,128 @@
+import math
+import os.path
+import re
+import subprocess
+import tempfile
+
+import nltk
+
+
+class FScore(object):
+    def __init__(self, recall, precision, fscore, complete_match, tagging_accuracy=100):
+        self.recall = recall
+        self.precision = precision
+        self.fscore = fscore
+        self.complete_match = complete_match
+        self.tagging_accuracy = tagging_accuracy
+
+    def __str__(self):
+        return (
+            f"("
+            f"Recall={self.recall:.2f}, "
+            f"Precision={self.precision:.2f}, "
+            f"FScore={self.fscore:.2f}, "
+            f"CompleteMatch={self.complete_match:.2f}"
+        ) + (
+            f", TaggingAccuracy={self.tagging_accuracy:.2f})"
+            if self.tagging_accuracy < 100
+            else ")"
+        )
+
+
+def evalb(evalb_dir, gold_trees, predicted_trees, ref_gold_path=None):
+    assert os.path.exists(evalb_dir)
+    evalb_program_path = os.path.join(evalb_dir, "evalb")
+    evalb_spmrl_program_path = os.path.join(evalb_dir, "evalb_spmrl")
+    assert os.path.exists(evalb_program_path) or os.path.exists(
+        evalb_spmrl_program_path
+    )
+
+    if os.path.exists(evalb_program_path):
+        evalb_param_path = os.path.join(evalb_dir, "nk.prm")
+    else:
+        evalb_program_path = evalb_spmrl_program_path
+        evalb_param_path = os.path.join(evalb_dir, "spmrl.prm")
+
+    assert os.path.exists(evalb_program_path)
+    assert os.path.exists(evalb_param_path)
+
+    assert len(gold_trees) == len(predicted_trees)
+    for gold_tree, predicted_tree in zip(gold_trees, predicted_trees):
+        assert isinstance(gold_tree, nltk.Tree)
+        assert isinstance(predicted_tree, nltk.Tree)
+        gold_leaves = list(gold_tree.leaves())
+        predicted_leaves = list(predicted_tree.leaves())
+        assert len(gold_leaves) == len(predicted_leaves)
+        assert all(
+            gold_word == predicted_word
+            for gold_word, predicted_word in zip(gold_leaves, predicted_leaves)
+        )
+
+    temp_dir = tempfile.TemporaryDirectory(prefix="evalb-")
+    gold_path = os.path.join(temp_dir.name, "gold.txt")
+    predicted_path = os.path.join(temp_dir.name, "predicted.txt")
+    output_path = os.path.join(temp_dir.name, "output.txt")
+
+    with open(gold_path, "w") as outfile:
+        if ref_gold_path is None:
+            for tree in gold_trees:
+                outfile.write("{}\n".format(tree.pformat(margin=1e100)))
+        else:
+            # For the SPMRL dataset our data loader performs some modifications
+            # (like stripping morphological features), so we compare to the
+            # raw gold file to be certain that we haven't spoiled the evaluation
+            # in some way.
+            with open(ref_gold_path) as goldfile:
+                outfile.write(goldfile.read())
+
+    with open(predicted_path, "w") as outfile:
+        for tree in predicted_trees:
+            outfile.write("{}\n".format(tree.pformat(margin=1e100)))
+
+    command = "{} -p {} {} {} > {}".format(
+        evalb_program_path,
+        evalb_param_path,
+        gold_path,
+        predicted_path,
+        output_path,
+    )
+    subprocess.run(command, shell=True)
+
+    # with open(output_path) as f:
+    #     print('contents of output file:')
+    #     for line in f:
+    #         print(line)
+
+    fscore = FScore(math.nan, math.nan, math.nan, math.nan)
+    with open(output_path) as infile:
+        for line in infile:
+            match = re.match(r"Bracketing Recall\s+=\s+(\d+\.\d+)", line)
+            if match:
+                fscore.recall = float(match.group(1))
+            match = re.match(r"Bracketing Precision\s+=\s+(\d+\.\d+)", line)
+            if match:
+                fscore.precision = float(match.group(1))
+            match = re.match(r"Bracketing FMeasure\s+=\s+(\d+\.\d+)", line)
+            if match:
+                fscore.fscore = float(match.group(1))
+            match = re.match(r"Complete match\s+=\s+(\d+\.\d+)", line)
+            if match:
+                fscore.complete_match = float(match.group(1))
+            match = re.match(r"Tagging accuracy\s+=\s+(\d+\.\d+)", line)
+            if match:
+                fscore.tagging_accuracy = float(match.group(1))
+                break
+
+    success = (
+        not math.isnan(fscore.fscore) or fscore.recall == 0.0 or fscore.precision == 0.0
+    )
+
+    if success:
+        temp_dir.cleanup()
+    else:
+        print("Error reading EVALB results.")
+        print("Gold path: {}".format(gold_path))
+        print("Predicted path: {}".format(predicted_path))
+        print("Output path: {}".format(output_path))
+
+    return fscore
diff --git a/parsing/src/export.py b/parsing/src/export.py
new file mode 100644
index 0000000000000000000000000000000000000000..e8ebb28106ce291e51882a7911e28301de763310
--- /dev/null
+++ b/parsing/src/export.py
@@ -0,0 +1,307 @@
+import argparse
+import functools
+import itertools
+import os.path
+import time
+
+import torch
+import torch.nn as nn
+
+import numpy as np
+
+import evaluate
+import treebanks
+
+from benepar import Parser, InputSentence
+from benepar.partitioned_transformer import PartitionedMultiHeadAttention
+
+import json
+
+
+def format_elapsed(start_time):
+    elapsed_time = int(time.time() - start_time)
+    minutes, seconds = divmod(elapsed_time, 60)
+    hours, minutes = divmod(minutes, 60)
+    days, hours = divmod(hours, 24)
+    elapsed_string = "{}h{:02}m{:02}s".format(hours, minutes, seconds)
+    if days > 0:
+        elapsed_string = "{}d{}".format(days, elapsed_string)
+    return elapsed_string
+
+
+def inputs_from_treebank(treebank, predict_tags):
+    return [
+        InputSentence(
+            words=example.words,
+            space_after=example.space_after,
+            tags=None if predict_tags else [tag for _, tag in example.pos()],
+            escaped_words=list(example.leaves()),
+        )
+        for example in treebank
+    ]
+
+
+def run_test(args):
+    print("Loading test trees from {}...".format(args.test_path))
+    test_treebank = treebanks.load_trees(
+        args.test_path, args.test_path_text, args.text_processing
+    )
+    print("Loaded {:,} test examples.".format(len(test_treebank)))
+
+    print("Loading model from {}...".format(args.model_path))
+    parser = Parser(args.model_path, batch_size=args.batch_size)
+
+    print("Parsing test sentences...")
+    start_time = time.time()
+
+    if args.output_path == "-":
+        output_file = sys.stdout
+    elif args.output_path:
+        output_file = open(args.output_path, "w")
+    else:
+        output_file = None
+
+    test_predicted = []
+    for predicted_tree in parser.parse_sents(
+        inputs_from_treebank(test_treebank, predict_tags=args.predict_tags)
+    ):
+        test_predicted.append(predicted_tree)
+        if output_file is not None:
+            print(tree.pformat(margin=1e100), file=output_file)
+
+    test_fscore = evaluate.evalb(args.evalb_dir, test_treebank.trees, test_predicted)
+
+    print(
+        "test-fscore {} "
+        "test-elapsed {}".format(
+            test_fscore,
+            format_elapsed(start_time),
+        )
+    )
+
+
+def get_compressed_state_dict(model):
+    state_dict = model.state_dict()
+    for module_name, module in model.named_modules():
+        if not isinstance(
+            module, (nn.Linear, nn.Embedding, PartitionedMultiHeadAttention)
+        ):
+            continue
+        elif "token_type_embeddings" in module_name:
+            continue
+        elif "position_embeddings" in module_name:
+            continue
+        elif "f_tag" in module_name or "f_label" in module_name:
+            continue
+        elif "project_pretrained" in module_name:
+            continue
+
+        if isinstance(module, PartitionedMultiHeadAttention):
+            weight_names = [
+                module_name + "." + param
+                for param in ("w_qkv_c", "w_qkv_p", "w_o_c", "w_o_p")
+            ]
+        else:
+            weight_names = [module_name + ".weight"]
+        for weight_name in weight_names:
+            weight = state_dict[weight_name]
+            if weight.shape.numel() <= 2048:
+                continue
+            print(weight_name, ":", weight.shape.numel(), "parameters")
+
+            if isinstance(module, nn.Embedding) or "word_embeddings" in module_name or "shared.weight" in weight_name:
+                is_embedding = True
+            else:
+                is_embedding = False
+
+            num_steps = 64
+            use_histogram = True
+            if "pooler.dense.weight" in weight_name:
+                weight.data.zero_()
+                continue
+            elif "pretrained_model" in weight_name and not is_embedding:
+                num_steps = 128
+                if not model.retokenizer.is_t5:
+                    use_histogram = False
+            elif isinstance(module, PartitionedMultiHeadAttention):
+                num_steps = 128
+
+            if use_histogram:
+                observer = torch.quantization.HistogramObserver()
+                observer.dst_nbins = num_steps
+                observer(weight)
+                scale, zero_point = observer.calculate_qparams()
+                scale = scale.item()
+                zero_point = zero_point.item()
+                cluster_centers = (
+                    scale * (np.arange(0, 256, 256 / num_steps) - zero_point)[:, None]
+                )
+                cluster_centers = np.asarray(cluster_centers, dtype=np.float32)
+            else:
+                weight_np = weight.cpu().detach().numpy()
+                min_val = weight_np.min()
+                max_val = weight_np.max()
+                bucket_width = (max_val - min_val) / num_steps
+                cluster_centers = (
+                    min_val
+                    + (np.arange(num_steps, dtype=np.float32) + 0.5) * bucket_width
+                )
+                cluster_centers = cluster_centers.reshape((-1, 1))
+
+            codebook = torch.tensor(
+                cluster_centers, dtype=weight.dtype, device=weight.device
+            )
+            distances = weight.data.reshape((-1, 1)) - codebook.t()
+            codes = torch.argmin(distances ** 2, dim=-1)
+            weight_rounded = codebook[codes].reshape(weight.shape)
+            weight.data.copy_(weight_rounded)
+
+    return state_dict
+
+
+def run_export(args):
+    if args.test_path is not None:
+        print("Loading test trees from {}...".format(args.test_path))
+        test_treebank = treebanks.load_trees(
+            args.test_path, args.test_path_text, args.text_processing
+        )
+        print("Loaded {:,} test examples.".format(len(test_treebank)))
+    else:
+        test_treebank = None
+
+    print("Loading model from {}...".format(args.model_path))
+    parser = Parser(args.model_path, batch_size=args.batch_size)
+    model = parser._parser
+    if model.pretrained_model is None:
+        raise ValueError(
+            "Exporting is only defined when using a pre-trained transformer "
+            "encoder. For CharLSTM-based model, just distribute the pytorch "
+            "checkpoint directly. You may manually delete the 'optimizer' "
+            "field to reduce file size by discarding the optimizer state."
+        )
+
+    if test_treebank is not None:
+        print("Parsing test sentences (predicting tags)...")
+        start_time = time.time()
+        test_inputs = inputs_from_treebank(test_treebank, predict_tags=True)
+        test_predicted = list(parser.parse_sents(test_inputs))
+        test_fscore = evaluate.evalb(args.evalb_dir, test_treebank.trees, test_predicted)
+        test_elapsed = format_elapsed(start_time)
+        print("test-fscore {} test-elapsed {}".format(test_fscore, test_elapsed))
+
+        print("Parsing test sentences (not predicting tags)...")
+        start_time = time.time()
+        test_inputs = inputs_from_treebank(test_treebank, predict_tags=False)
+        notags_test_predicted = list(parser.parse_sents(test_inputs))
+        notags_test_fscore = evaluate.evalb(
+            args.evalb_dir, test_treebank.trees, notags_test_predicted
+        )
+        notags_test_elapsed = format_elapsed(start_time)
+        print(
+            "test-fscore {} test-elapsed {}".format(notags_test_fscore, notags_test_elapsed)
+        )
+
+    print("Exporting tokenizer...")
+    model.retokenizer.tokenizer.save_pretrained(args.output_dir)
+
+    print("Exporting config...")
+    config = model.pretrained_model.config
+    config.benepar = model.config
+    config.save_pretrained(args.output_dir)
+
+    if args.compress:
+        print("Compressing weights...")
+        state_dict = get_compressed_state_dict(model.cpu())
+        print("Saving weights...")
+    else:
+        print("Exporting weights...")
+        state_dict = model.cpu().state_dict()
+    torch.save(state_dict, os.path.join(args.output_dir, "benepar_model.bin"))
+
+    del model, parser, state_dict
+
+    print("Loading exported model from {}...".format(args.output_dir))
+    exported_parser = Parser(args.output_dir, batch_size=args.batch_size)
+
+    if test_treebank is None:
+        print()
+        print("Export complete.")
+        print("Did not verify model accuracy because no treebank was provided.")
+        return
+
+    print("Parsing test sentences (predicting tags)...")
+    start_time = time.time()
+    test_inputs = inputs_from_treebank(test_treebank, predict_tags=True)
+    exported_predicted = list(exported_parser.parse_sents(test_inputs))
+    exported_fscore = evaluate.evalb(
+        args.evalb_dir, test_treebank.trees, exported_predicted
+    )
+    exported_elapsed = format_elapsed(start_time)
+    print(
+        "exported-fscore {} exported-elapsed {}".format(
+            exported_fscore, exported_elapsed
+        )
+    )
+
+    print("Parsing test sentences (not predicting tags)...")
+    start_time = time.time()
+    test_inputs = inputs_from_treebank(test_treebank, predict_tags=False)
+    notags_exported_predicted = list(exported_parser.parse_sents(test_inputs))
+    notags_exported_fscore = evaluate.evalb(
+        args.evalb_dir, test_treebank.trees, notags_exported_predicted
+    )
+    notags_exported_elapsed = format_elapsed(start_time)
+    print(
+        "exported-fscore {} exported-elapsed {}".format(
+            notags_exported_fscore, notags_exported_elapsed
+        )
+    )
+
+    print()
+    print("Export and verification complete.")
+    fscore_delta = evaluate.FScore(
+        recall=notags_exported_fscore.recall - notags_test_fscore.recall,
+        precision=notags_exported_fscore.precision - notags_test_fscore.precision,
+        fscore=notags_exported_fscore.fscore - notags_test_fscore.fscore,
+        complete_match=(
+            notags_exported_fscore.complete_match - notags_test_fscore.complete_match
+        ),
+        tagging_accuracy=(
+            exported_fscore.tagging_accuracy - test_fscore.tagging_accuracy
+        ),
+    )
+    print("delta-fscore {}".format(fscore_delta))
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    subparsers = parser.add_subparsers()
+
+    subparser = subparsers.add_parser("test")
+    subparser.set_defaults(callback=run_test)
+    subparser.add_argument("--model-path", type=str, required=True)
+    subparser.add_argument("--evalb-dir", default="EVALB/")
+    subparser.add_argument("--test-path", type=str, required=True)
+    subparser.add_argument("--test-path-text", type=str)
+    subparser.add_argument("--text-processing", default="default")
+    subparser.add_argument("--predict-tags", action="store_true")
+    subparser.add_argument("--output-path", default="")
+    subparser.add_argument("--batch-size", type=int, default=8)
+
+    subparser = subparsers.add_parser("export")
+    subparser.set_defaults(callback=run_export)
+    subparser.add_argument("--model-path", type=str, required=True)
+    subparser.add_argument("--output-dir", type=str, required=True)
+    subparser.add_argument("--evalb-dir", default="EVALB/")
+    subparser.add_argument("--test-path", type=str, default=None)
+    subparser.add_argument("--test-path-text", type=str)
+    subparser.add_argument("--text-processing", default="default")
+    subparser.add_argument("--compress", action="store_true")
+    subparser.add_argument("--batch-size", type=int, default=8)
+
+    args = parser.parse_args()
+    args.callback(args)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/parsing/src/learning_rates.py b/parsing/src/learning_rates.py
new file mode 100644
index 0000000000000000000000000000000000000000..94aa5eb330fe70215ddc8da8979a21588466e376
--- /dev/null
+++ b/parsing/src/learning_rates.py
@@ -0,0 +1,33 @@
+from torch.optim.lr_scheduler import ReduceLROnPlateau
+
+
+class WarmupThenReduceLROnPlateau(ReduceLROnPlateau):
+    def __init__(self, optimizer, warmup_steps, *args, **kwargs):
+        """
+        Args:
+            optimizer (Optimizer): Optimizer to wrap
+            warmup_steps: number of steps before reaching base learning rate
+            *args: Arguments for ReduceLROnPlateau
+            **kwargs: Arguments for ReduceLROnPlateau
+        """
+        super().__init__(optimizer, *args, **kwargs)
+        self.warmup_steps = warmup_steps
+        self.steps_taken = 0
+        self.base_lrs = list(map(lambda group: group["lr"], optimizer.param_groups))
+        for param_group, lr in zip(self.optimizer.param_groups, self.get_lr()):
+            param_group["lr"] = lr
+
+    def get_lr(self):
+        assert self.steps_taken <= self.warmup_steps
+        return [
+            base_lr * (self.steps_taken / self.warmup_steps)
+            for base_lr in self.base_lrs
+        ]
+
+    def step(self, metrics=None):
+        self.steps_taken += 1
+        if self.steps_taken <= self.warmup_steps:
+            for param_group, lr in zip(self.optimizer.param_groups, self.get_lr()):
+                param_group["lr"] = lr
+        elif metrics is not None:
+            super().step(metrics)
diff --git a/parsing/src/main.py b/parsing/src/main.py
new file mode 100644
index 0000000000000000000000000000000000000000..a141d34d637368d6fe0732357fc567ee580f3024
--- /dev/null
+++ b/parsing/src/main.py
@@ -0,0 +1,414 @@
+import argparse
+import functools
+import itertools
+import os.path
+import time
+
+import torch
+
+import numpy as np
+
+from benepar import char_lstm
+from benepar import decode_chart
+from benepar import nkutil
+from benepar import parse_chart
+import evaluate
+import learning_rates
+import treebanks
+
+def format_elapsed(start_time):
+    elapsed_time = int(time.time() - start_time)
+    minutes, seconds = divmod(elapsed_time, 60)
+    hours, minutes = divmod(minutes, 60)
+    days, hours = divmod(hours, 24)
+    elapsed_string = "{}h{:02}m{:02}s".format(hours, minutes, seconds)
+    if days > 0:
+        elapsed_string = "{}d{}".format(days, elapsed_string)
+    return elapsed_string
+
+
+def make_hparams():
+    return nkutil.HParams(
+        # Data processing
+        max_len_train=0,  # no length limit
+        max_len_dev=0,  # no length limit
+        # Optimization
+        batch_size=32,
+        learning_rate=0.00005,
+        learning_rate_warmup_steps=160,
+        clip_grad_norm=0.0,  # no clipping
+        checks_per_epoch=4,
+        step_decay_factor=0.5,
+        step_decay_patience=5,
+        max_consecutive_decays=3,  # establishes a termination criterion
+        # CharLSTM
+        use_chars_lstm=False,
+        d_char_emb=64,
+        char_lstm_input_dropout=0.2,
+        # BERT and other pre-trained models
+        use_pretrained=False,
+        pretrained_model="bert-base-uncased",
+        # Partitioned transformer encoder
+        use_encoder=False,
+        d_model=1024,
+        num_layers=8,
+        num_heads=8,
+        d_kv=64,
+        d_ff=2048,
+        encoder_max_len=512,
+        # Dropout
+        morpho_emb_dropout=0.2,
+        attention_dropout=0.2,
+        relu_dropout=0.1,
+        residual_dropout=0.2,
+        # Output heads and losses
+        force_root_constituent="auto",
+        predict_tags=False,
+        d_label_hidden=256,
+        d_tag_hidden=256,
+        tag_loss_scale=5.0,
+    )
+
+
+def run_train(args, hparams):
+    import wandb
+    wandb.init(project='german-delex-parser')
+
+    if args.numpy_seed is not None:
+        print("Setting numpy random seed to {}...".format(args.numpy_seed))
+        np.random.seed(args.numpy_seed)
+
+    # Make sure that pytorch is actually being initialized randomly.
+    # On my cluster I was getting highly correlated results from multiple
+    # runs, but calling reset_parameters() changed that. A brief look at the
+    # pytorch source code revealed that pytorch initializes its RNG by
+    # calling std::random_device, which according to the C++ spec is allowed
+    # to be deterministic.
+    seed_from_numpy = np.random.randint(2147483648)
+    print("Manual seed for pytorch:", seed_from_numpy)
+    torch.manual_seed(seed_from_numpy)
+
+    hparams.set_from_args(args)
+    print("Hyperparameters:")
+    hparams.print()
+
+    print("Loading training trees from {}...".format(args.train_path))
+    train_treebank = treebanks.load_trees(
+        args.train_path, args.train_path_text, args.text_processing
+    )
+    if hparams.max_len_train > 0:
+        train_treebank = train_treebank.filter_by_length(hparams.max_len_train)
+    print("Loaded {:,} training examples.".format(len(train_treebank)))
+
+    print("Loading development trees from {}...".format(args.dev_path))
+    dev_treebank = treebanks.load_trees(
+        args.dev_path, args.dev_path_text, args.text_processing
+    )
+    if hparams.max_len_dev > 0:
+        dev_treebank = dev_treebank.filter_by_length(hparams.max_len_dev)
+    print("Loaded {:,} development examples.".format(len(dev_treebank)))
+
+    print("Constructing vocabularies...")
+    label_vocab = decode_chart.ChartDecoder.build_vocab(train_treebank.trees)
+    if hparams.use_chars_lstm:
+        char_vocab = char_lstm.RetokenizerForCharLSTM.build_vocab(train_treebank.sents)
+    else:
+        char_vocab = None
+
+    tag_vocab = set()
+    for tree in train_treebank.trees:
+        for _, tag in tree.pos():
+            tag_vocab.add(tag)
+    tag_vocab = ["UNK"] + sorted(tag_vocab)
+    tag_vocab = {label: i for i, label in enumerate(tag_vocab)}
+
+    if hparams.force_root_constituent.lower() in ("true", "yes", "1"):
+        hparams.force_root_constituent = True
+    elif hparams.force_root_constituent.lower() in ("false", "no", "0"):
+        hparams.force_root_constituent = False
+    elif hparams.force_root_constituent.lower() == "auto":
+        hparams.force_root_constituent = (
+            decode_chart.ChartDecoder.infer_force_root_constituent(train_treebank.trees)
+        )
+        print("Set hparams.force_root_constituent to", hparams.force_root_constituent)
+
+    print("Initializing model...")
+    parser = parse_chart.ChartParser(
+        tag_vocab=tag_vocab,
+        label_vocab=label_vocab,
+        char_vocab=char_vocab,
+        hparams=hparams,
+    )
+    if args.parallelize:
+        parser.parallelize()
+    elif torch.cuda.is_available():
+        parser.cuda()
+    else:
+        print("Not using CUDA!")
+
+    print("Initializing optimizer...")
+    trainable_parameters = [
+        param for param in parser.parameters() if param.requires_grad
+    ]
+    optimizer = torch.optim.Adam(
+        trainable_parameters, lr=hparams.learning_rate, betas=(0.9, 0.98), eps=1e-9
+    )
+
+    scheduler = learning_rates.WarmupThenReduceLROnPlateau(
+        optimizer,
+        hparams.learning_rate_warmup_steps,
+        mode="max",
+        factor=hparams.step_decay_factor,
+        patience=hparams.step_decay_patience * hparams.checks_per_epoch,
+        verbose=True,
+    )
+
+    clippable_parameters = trainable_parameters
+    grad_clip_threshold = (
+        np.inf if hparams.clip_grad_norm == 0 else hparams.clip_grad_norm
+    )
+
+    print("Training...")
+    total_processed = 0
+    current_processed = 0
+    check_every = len(train_treebank) / hparams.checks_per_epoch
+    best_dev_fscore = -np.inf
+    best_dev_model_path = None
+    best_dev_processed = 0
+
+    start_time = time.time()
+
+    def check_dev():
+        nonlocal best_dev_fscore
+        nonlocal best_dev_model_path
+        nonlocal best_dev_processed
+
+        dev_start_time = time.time()
+
+        dev_predicted = parser.parse(
+            dev_treebank.without_gold_annotations(),
+            subbatch_max_tokens=args.subbatch_max_tokens,
+        )
+        dev_fscore = evaluate.evalb(args.evalb_dir, dev_treebank.trees, dev_predicted)
+        wandb.log(
+            {"dev-fscore": dev_fscore.fscore,
+             "dev-recall": dev_fscore.recall,
+             "dev-precision": dev_fscore.precision,
+             "dev-completematch": dev_fscore.complete_match
+             }
+        )
+
+        print(
+            "dev-fscore {} "
+            "dev-elapsed {} "
+            "total-elapsed {}".format(
+                dev_fscore,
+                format_elapsed(dev_start_time),
+                format_elapsed(start_time),
+            )
+        )
+
+        if dev_fscore.fscore > best_dev_fscore:
+            if best_dev_model_path is not None:
+                extensions = [".pt"]
+                for ext in extensions:
+                    path = best_dev_model_path + ext
+                    if os.path.exists(path):
+                        print("Removing previous model file {}...".format(path))
+                        os.remove(path)
+
+            best_dev_fscore = dev_fscore.fscore
+            best_dev_model_path = "{}_dev={:.2f}".format(
+                args.model_path_base, dev_fscore.fscore
+            )
+            best_dev_processed = total_processed
+            print("Saving new best model to {}...".format(best_dev_model_path))
+            torch.save(
+                {
+                    "config": parser.config,
+                    "state_dict": parser.state_dict(),
+                    "optimizer": optimizer.state_dict(),
+                },
+                best_dev_model_path + ".pt",
+            )
+
+    data_loader = torch.utils.data.DataLoader(
+        train_treebank,
+        batch_size=hparams.batch_size,
+        shuffle=True,
+        collate_fn=functools.partial(
+            parser.encode_and_collate_subbatches,
+            subbatch_max_tokens=args.subbatch_max_tokens,
+        ),
+    )
+    train_step = 0
+    for epoch in itertools.count(start=1):
+        epoch_start_time = time.time()
+
+        for batch_num, batch in enumerate(data_loader, start=1):
+            optimizer.zero_grad()
+            parser.train()
+
+            batch_loss_value = 0.0
+            for subbatch_size, subbatch in batch:
+                loss = parser.compute_loss(subbatch)
+                loss_value = float(loss.data.cpu().numpy())
+                batch_loss_value += loss_value
+                if loss_value > 0:
+                    loss.backward()
+                del loss
+                total_processed += subbatch_size
+                current_processed += subbatch_size
+
+            grad_norm = torch.nn.utils.clip_grad_norm_(
+                clippable_parameters, grad_clip_threshold
+            )
+
+            optimizer.step()
+            train_step += 1
+
+            wandb.log(
+                {'batch-loss': batch_loss_value,}
+            )
+
+            if train_step % 100 == 0:
+                print(
+                    "epoch {:,} "
+                    "batch {:,}/{:,} "
+                    "processed {:,} "
+                    "batch-loss {:.4f} "
+                    "grad-norm {:.4f} "
+                    "epoch-elapsed {} "
+                    "total-elapsed {}".format(
+                        epoch,
+                        batch_num,
+                        int(np.ceil(len(train_treebank) / hparams.batch_size)),
+                        total_processed,
+                        batch_loss_value,
+                        grad_norm,
+                        format_elapsed(epoch_start_time),
+                        format_elapsed(start_time),
+                    )
+                )
+
+            if current_processed >= check_every:
+                current_processed -= check_every
+                check_dev()
+                scheduler.step(metrics=best_dev_fscore)
+            else:
+                scheduler.step()
+
+        if (total_processed - best_dev_processed) > (
+            (hparams.step_decay_patience + 1)
+            * hparams.max_consecutive_decays
+            * len(train_treebank)
+        ):
+            print("Terminating due to lack of improvement in dev fscore.")
+            break
+
+
+def run_test(args):
+    print("Loading test trees from {}...".format(args.test_path))
+    test_treebank = treebanks.load_trees(
+        args.test_path, args.test_path_text, args.text_processing
+    )
+    print("Loaded {:,} test examples.".format(len(test_treebank)))
+
+    if len(args.model_path) != 1:
+        raise NotImplementedError(
+            "Ensembling multiple parsers is not "
+            "implemented in this version of the code."
+        )
+
+    model_path = args.model_path[0]
+    print("Loading model from {}...".format(model_path))
+    parser = parse_chart.ChartParser.from_trained(model_path)
+    if args.no_predict_tags and parser.f_tag is not None:
+        print("Removing part-of-speech tagging head...")
+        parser.f_tag = None
+    if args.parallelize:
+        parser.parallelize()
+    elif torch.cuda.is_available():
+        parser.cuda()
+
+    print("Parsing test sentences...")
+    start_time = time.time()
+
+    test_predicted = parser.parse(
+        test_treebank.without_gold_annotations(),
+        subbatch_max_tokens=args.subbatch_max_tokens,
+    )
+
+    if args.output_path == "-":
+        for tree in test_predicted:
+            print(tree.pformat(margin=1e100))
+    elif args.output_path:
+        with open(args.output_path, "w") as outfile:
+            for tree in test_predicted:
+                outfile.write("{}\n".format(tree.pformat(margin=1e100)))
+
+    # The tree loader does some preprocessing to the trees (e.g. stripping TOP
+    # symbols or SPMRL morphological features). We compare with the input file
+    # directly to be extra careful about not corrupting the evaluation. We also
+    # allow specifying a separate "raw" file for the gold trees: the inputs to
+    # our parser have traces removed and may have predicted tags substituted,
+    # and we may wish to compare against the raw gold trees to make sure we
+    # haven't made a mistake. As far as we can tell all of these variations give
+    # equivalent results.
+    ref_gold_path = args.test_path
+    if args.test_path_raw is not None:
+        print("Comparing with raw trees from", args.test_path_raw)
+        ref_gold_path = args.test_path_raw
+
+    test_fscore = evaluate.evalb(
+        args.evalb_dir, test_treebank.trees, test_predicted, ref_gold_path=ref_gold_path
+    )
+
+    print(
+        "test-fscore {} "
+        "test-elapsed {}".format(
+            test_fscore,
+            format_elapsed(start_time),
+        )
+    )
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    subparsers = parser.add_subparsers()
+
+    hparams = make_hparams()
+    subparser = subparsers.add_parser("train")
+    subparser.set_defaults(callback=lambda args: run_train(args, hparams))
+    hparams.populate_arguments(subparser)
+    subparser.add_argument("--numpy-seed", type=int)
+    subparser.add_argument("--model-path-base", required=True)
+    subparser.add_argument("--evalb-dir", default="EVALB/")
+    subparser.add_argument("--train-path", default="data/wsj/train_02-21.LDC99T42")
+    subparser.add_argument("--train-path-text", type=str)
+    subparser.add_argument("--dev-path", default="data/wsj/dev_22.LDC99T42")
+    subparser.add_argument("--dev-path-text", type=str)
+    subparser.add_argument("--text-processing", default="default")
+    subparser.add_argument("--subbatch-max-tokens", type=int, default=2000)
+    subparser.add_argument("--parallelize", action="store_true")
+    subparser.add_argument("--print-vocabs", action="store_true")
+
+    subparser = subparsers.add_parser("test")
+    subparser.set_defaults(callback=run_test)
+    subparser.add_argument("--model-path", nargs="+", required=True)
+    subparser.add_argument("--evalb-dir", default="EVALB/")
+    subparser.add_argument("--test-path", default="data/wsj/test_23.LDC99T42")
+    subparser.add_argument("--test-path-text", type=str)
+    subparser.add_argument("--test-path-raw", type=str)
+    subparser.add_argument("--text-processing", default="default")
+    subparser.add_argument("--subbatch-max-tokens", type=int, default=500)
+    subparser.add_argument("--parallelize", action="store_true")
+    subparser.add_argument("--output-path", default="")
+    subparser.add_argument("--no-predict-tags", action="store_true")
+
+    args = parser.parse_args()
+    args.callback(args)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/parsing/src/parse.py b/parsing/src/parse.py
new file mode 100644
index 0000000000000000000000000000000000000000..4c353abd6f56bc1494f0b77bf9d254fe526cb058
--- /dev/null
+++ b/parsing/src/parse.py
@@ -0,0 +1,62 @@
+#!/usr/bin/python3
+
+import argparse
+import pickle
+import torch
+
+from .treebanks import load_single_text
+from .benepar import parse_chart
+from huggingface_hub import hf_hub_download
+
+
+def run_parse(words, tags, model_path='nielklug/mhg_parser', subbatch_max_tokens=500):
+    # print("Loading test trees from {}...".format(args.text_path))
+    test_treebank = load_single_text(words, tags)
+    # print("Loaded {:,} test examples.".format(len(test_treebank)))
+    model_file = hf_hub_download(repo_id=model_path, filename='german-delex-parser_dev=83.10.pt')
+    # print("Loading model from {}...".format(model_path))
+    parser = parse_chart.ChartParser.from_trained(model_file)
+
+    if torch.cuda.is_available():
+        parser.cuda()
+    
+    # print("Parsing test sentences...")
+
+    test_predicted = parser.parse(
+        test_treebank.without_gold_annotations(),
+        subbatch_max_tokens=subbatch_max_tokens,
+    )
+
+    # insert original tokens to the delexicalized parses
+    for example, prediction in zip(test_treebank, test_predicted):
+        leaf_positions = prediction.treepositions('leaves')
+        for word_tag_pair, leaf_pos in zip(example.word_tag_pairs, leaf_positions):
+            prediction[leaf_pos] = word_tag_pair[0]
+            prediction[leaf_pos[:-1]].set_label(word_tag_pair[1])
+
+    results = []
+    for tree in test_predicted:
+        results.append(tree.pformat(margin=1e100))
+    return results
+
+    # with open(args.output_path, "w") as outfile:
+    #     for tree in test_predicted:
+    #         outfile.write("{}\n".format(tree.pformat(margin=1e100)))
+
+
+def main():
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument("--model_path", default="/mounts/work/nie/mhg/schmid/MHG-Parser/self-attentive-parser-master/models/german-delex-parser_dev=83.10.pt",
+    type=str, help='path to the trained parser')
+    parser.add_argument("--text_path", required=True, type=str)
+    parser.add_argument('--subbatch_max_tokens', default=500, type=str)
+    parser.add_argument('--output_path', default="", type=str)
+
+    args = parser.parse_args()
+
+    run_parse(args)
+
+if __name__ == "__main__":
+    main()
+
diff --git a/parsing/src/test.py b/parsing/src/test.py
new file mode 100644
index 0000000000000000000000000000000000000000..c55d0e2e3fb61985b96fd1514de6f236360e90ce
--- /dev/null
+++ b/parsing/src/test.py
@@ -0,0 +1,78 @@
+import argparse
+from benepar import nkutil
+
+def make_hparams():
+    return nkutil.HParams(
+        # Data processing
+        max_len_train=0,  # no length limit
+        max_len_dev=0,  # no length limit
+        # Optimization
+        batch_size=32,
+        learning_rate=0.00005,
+        learning_rate_warmup_steps=160,
+        clip_grad_norm=0.0,  # no clipping
+        checks_per_epoch=4,
+        step_decay_factor=0.5,
+        step_decay_patience=5,
+        max_consecutive_decays=3,  # establishes a termination criterion
+        # CharLSTM
+        use_chars_lstm=False,
+        d_char_emb=64,
+        char_lstm_input_dropout=0.2,
+        # BERT and other pre-trained models
+        use_pretrained=False,
+        pretrained_model="bert-base-uncased",
+        # Partitioned transformer encoder
+        use_encoder=False,
+        d_model=1024,
+        num_layers=8,
+        num_heads=8,
+        d_kv=64,
+        d_ff=2048,
+        encoder_max_len=512,
+        # Dropout
+        morpho_emb_dropout=0.2,
+        attention_dropout=0.2,
+        relu_dropout=0.1,
+        residual_dropout=0.2,
+        # Output heads and losses
+        force_root_constituent="auto",
+        predict_tags=False,
+        d_label_hidden=256,
+        d_tag_hidden=256,
+        tag_loss_scale=5.0,
+    )
+
+def run_train(args, hparams):
+    print("Train:")
+    print("dimension of attention key value: {}".format(hparams.d_kv))
+    print("use pretrained: {}".format(args.use_pretrained))
+    print("text processing mode: {}".format(args.text_processing))
+
+def run_test(args):
+    print("Test:")
+    print("text processing mode: {}".format(args.text_processing))
+
+def main():
+    print("running...")
+    parser = argparse.ArgumentParser()
+    subparsers = parser.add_subparsers()
+
+    hparams = make_hparams()
+    subparser = subparsers.add_parser('train')
+    subparser.set_defaults(callback=lambda args: run_train(args, hparams))
+
+    hparams.populate_arguments(subparser)
+    subparser.add_argument("--text-processing", default='default')
+
+    subparser = subparsers.add_parser("test")
+    subparser.set_defaults(callback=run_test)
+    subparser.add_argument("--test", required=True)
+    subparser.add_argument("--text-processing", default='default')
+
+    args = parser.parse_args()
+    print(args.__dict__)
+    args.callback(args)
+
+if __name__ == '__main__':
+    main()
\ No newline at end of file
diff --git a/parsing/src/transliterate.py b/parsing/src/transliterate.py
new file mode 100644
index 0000000000000000000000000000000000000000..142d078f09743d3329958662f944b3adda903857
--- /dev/null
+++ b/parsing/src/transliterate.py
@@ -0,0 +1,161 @@
+BUCKWALTER_MAP = {
+    '\'': '\u0621',
+    '|': '\u0622',
+    '>': '\u0623',
+    'O': '\u0623',
+    '&': '\u0624',
+    'W': '\u0624',
+    '<': '\u0625',
+    'I': '\u0625',
+    '}': '\u0626',
+    'A': '\u0627',
+    'b': '\u0628',
+    'p': '\u0629',
+    't': '\u062A',
+    'v': '\u062B',
+    'j': '\u062C',
+    'H': '\u062D',
+    'x': '\u062E',
+    'd': '\u062F',
+    '*': '\u0630',
+    'r': '\u0631',
+    'z': '\u0632',
+    's': '\u0633',
+    '$': '\u0634',
+    'S': '\u0635',
+    'D': '\u0636',
+    'T': '\u0637',
+    'Z': '\u0638',
+    'E': '\u0639',
+    'g': '\u063A',
+    '_': '\u0640',
+    'f': '\u0641',
+    'q': '\u0642',
+    'k': '\u0643',
+    'l': '\u0644',
+    'm': '\u0645',
+    'n': '\u0646',
+    'h': '\u0647',
+    'w': '\u0648',
+    'Y': '\u0649',
+    'y': '\u064A',
+    'F': '\u064B',
+    'N': '\u064C',
+    'K': '\u064D',
+    'a': '\u064E',
+    'u': '\u064F',
+    'i': '\u0650',
+    '~': '\u0651',
+    'o': '\u0652',
+    '`': '\u0670',
+    '{': '\u0671',
+}
+
+BUCKWALTER_UNESCAPE = {
+    "-LRB-": "(",
+    "-RRB-": ")",
+    "-LCB-": "{",
+    "-RCB-": "}",
+    "-LSB-": "[",
+    "-RSB-": "]",
+    '-PLUS-': "+",
+    '-MINUS-': "-",
+}
+
+BUCKWALTER_UNCHANGED = set('.?!,"%-/:;=')
+
+HEBREW_MAP = {
+    'A': '\u05d0',
+    'B': '\u05d1',
+    'G': '\u05d2',
+    'D': '\u05d3',
+    'H': '\u05d4',
+    'W': '\u05d5',
+    'Z': '\u05d6',
+    'X': '\u05d7',
+    'J': '\u05d8',
+    'I': '\u05d9',
+    'K': '\u05db',
+    'L': '\u05dc',
+    'M': '\u05de',
+    'N': '\u05e0',
+    'S': '\u05e1',
+    'E': '\u05e2',
+    'P': '\u05e4',
+    'C': '\u05e6',
+    'Q': '\u05e7',
+    'R': '\u05e8',
+    'F': '\u05e9',
+    'T': '\u05ea',
+    '0': '0',
+    '1': '1',
+    '2': '2',
+    '3': '3',
+    '4': '4',
+    '5': '5',
+    '6': '6',
+    '7': '7',
+    '8': '8',
+    '9': '9',
+    'U': '"',
+    'O': '%',
+    '.': '.',
+    ',': ',',
+}
+
+HEBREW_SUFFIX_MAP = {
+    '\u05db': '\u05da',
+    '\u05de': '\u05dd',
+    '\u05e0': '\u05df',
+    '\u05e4': '\u05e3',
+    '\u05e6': '\u05e5',
+}
+
+HEBREW_UNESCAPE = {
+    "yyCLN": ":",
+    "yyCM": ",",
+    "yyDASH": "-",
+    "yyDOT": ".",
+    "yyELPS": "...",
+    "yyEXCL": "!",
+    "yyLRB": "(",
+    "yyQM": "?",
+    "yyRRB": ")",
+    "yySCLN": ";",
+}
+
+
+
+def arabic(inp):
+    """
+    Undo Buckwalter transliteration
+
+    See: http://languagelog.ldc.upenn.edu/myl/ldc/morph/buckwalter.html
+
+    This code inspired by:
+    https://github.com/dlwh/epic/blob/master/src/main/scala/epic/util/ArabicNormalization.scala
+    """
+    return "".join(
+        BUCKWALTER_MAP.get(char, char)
+        for char in BUCKWALTER_UNESCAPE.get(inp, inp))
+
+def hebrew(inp):
+    """
+    Undo Hebrew transliteration
+
+    See: http://www.phil.uu.nl/ozsl/articles/simaan02.pdf
+
+    This code inspired by:
+    https://github.com/habeanf/yap/blob/b57502364b73ef78f3510eb890319ae268eeacca/nlp/parser/xliter8/types.go
+    """
+    out = "".join(
+        HEBREW_MAP.get(char, char)
+        for char in HEBREW_UNESCAPE.get(inp, inp))
+    if out and (out[-1] in HEBREW_SUFFIX_MAP):
+        out = out[:-1] + HEBREW_SUFFIX_MAP[out[-1]]
+    return out
+
+TRANSLITERATIONS = {
+    'arabic': arabic,
+    'hebrew': hebrew,
+}
diff --git a/parsing/src/treebanks.py b/parsing/src/treebanks.py
new file mode 100644
index 0000000000000000000000000000000000000000..0f5cf4191448756b419df0899c6b142dd1802761
--- /dev/null
+++ b/parsing/src/treebanks.py
@@ -0,0 +1,323 @@
+import dataclasses
+from typing import List, Optional, Tuple
+
+import nltk
+from nltk.corpus.reader.bracket_parse import BracketParseCorpusReader
+import torch
+
+from .benepar import ptb_unescape
+from .benepar.parse_base import BaseInputExample
+from .transliterate import TRANSLITERATIONS
+
+
+
+@dataclasses.dataclass
+class ParsingExample(BaseInputExample):
+    """A single parse tree and sentence."""
+
+    words: List[str]
+    space_after: List[bool]
+    tree: Optional[nltk.Tree] = None
+    _pos: Optional[List[Tuple[str, str]]] = None
+    word_tag_pairs: Optional[List[Tuple[str, str]]] = None
+
+    def leaves(self):
+        if self.tree is not None:
+            return self.tree.leaves()
+        elif self._pos is not None:
+            return [word for word, tag in self._pos]
+        else:
+            return None
+
+    def pos(self):
+        if self.tree is not None:
+            return self.tree.pos()
+        else:
+            return self._pos
+
+    def without_gold_annotations(self):
+        return dataclasses.replace(self, tree=None, _pos=self.pos())
+
+
+class Treebank(torch.utils.data.Dataset):
+    def __init__(self, examples):
+        self.examples = examples
+
+    def __len__(self):
+        return len(self.examples)
+
+    def __getitem__(self, index):
+        return self.examples[index]
+
+    @property
+    def trees(self):
+        return [x.tree for x in self.examples]
+
+    @property
+    def sents(self):
+        return [x.words for x in self.examples]
+
+    @property
+    def tagged_sents(self):
+        return [x.pos() for x in self.examples]
+
+    def filter_by_length(self, max_len):
+        return Treebank([x for x in self.examples if len(x.leaves()) <= max_len])
+
+    def without_gold_annotations(self):
+        return Treebank([x.without_gold_annotations() for x in self.examples])
+
+
+def read_text(text_path):
+    sents = []
+    sent = []
+    end_of_multiword = 0
+    multiword_combined = ""
+    multiword_separate = []
+    multiword_sp_after = False
+    with open(text_path) as f:
+        for line in f:
+            if not line.strip() or line.startswith("#"):
+                if sent:
+                    sents.append(([w for w, sp in sent], [sp for w, sp in sent]))
+                    sent = []
+                    assert end_of_multiword == 0
+                continue
+            fields = line.split("\t", 2)
+            num_or_range = fields[0]
+            w = fields[1]
+
+            if "-" in num_or_range:
+                end_of_multiword = int(num_or_range.split("-")[1])
+                multiword_combined = w
+                multiword_separate = []
+                multiword_sp_after = "SpaceAfter=No" not in fields[-1]
+                continue
+            elif int(num_or_range) <= end_of_multiword:
+                multiword_separate.append(w)
+                if int(num_or_range) == end_of_multiword:
+                    import tokenizations
+                    _, separate_to_combined = tokenizations.get_alignments(
+                        multiword_combined, multiword_separate
+                    )
+                    have_up_to = 0
+                    for i, char_idxs in enumerate(separate_to_combined):
+                        if i == len(multiword_separate) - 1:
+                            word = multiword_combined[have_up_to:]
+                            sent.append((word, multiword_sp_after))
+                        elif char_idxs:
+                            word = multiword_combined[have_up_to : max(char_idxs) + 1]
+                            sent.append((word, False))
+                            have_up_to = max(char_idxs) + 1
+                        else:
+                            sent.append(("", False))
+                    assert int(num_or_range) == len(sent)
+                    end_of_multiword = 0
+                    multiword_combined = ""
+                    multiword_separate = []
+                    multiword_sp_after = False
+                continue
+            else:
+                assert int(num_or_range) == len(sent) + 1
+                sp = "SpaceAfter=No" not in fields[-1]
+                sent.append((w, sp))
+    return sents
+
+
+def load_trees(const_path, text_path=None, text_processing="default"):
+    """Load a treebank.
+
+    The standard tree format presents an abstracted view of the raw text, with the
+    assumption that a tokenizer and other early stages of the NLP pipeline have already
+    been run. These can include formatting changes like escaping certain characters
+    (e.g. -LRB-) or transliteration (see e.g. the Arabic and Hebrew SPMRL datasets).
+    Tokens are not always delimited by whitespace, and the raw whitespace in the source
+    text is thrown away in the PTB tree format. Moreover, in some treebanks the leaves
+    of the trees are lemmas/stems rather than word forms.
+
+    All of this is a mismatch for pre-trained transformer models, which typically do
+    their own tokenization starting with raw unicode strings. A mismatch compared to
+    pre-training often doesn't affect performance if you just want to report F1 scores
+    within the same treebank, but it raises some questions when it comes to releasing a
+    parser for general use: (1) Must the parser be integrated with a tokenizer that
+    matches the treebank convention? In fact, many modern NLP libraries like spaCy train
+    on dependency data that doesn't necessarily use the same tokenization convention as
+    constituency treebanks. (2) Can the parser's pre-trained model be merged with other
+    pre-trained system components (via methods like multi-task learning or adapters), or
+    must it remain its own system because of tokenization mismatches?
+
+    This tree-loading function aims to build a path towards parsing from raw text by
+    using the `text_path` argument to specify an auxiliary file that can be used to
+    recover the original unicode string for the text. Parser layers above the
+    pre-trained model may still use gold tokenization during training, but this will
+    possibly help make the parser more robust to tokenization mismatches.
+
+    On the other hand, some benchmarks involve evaluating with gold tokenization, and
+    naively switching to using raw text degrades performance substantially. This can
+    hopefully be addressed by making the parser layers on top of the pre-trained
+    transformers handle tokenization more intelligently, but this is still a work in
+    progress and the option remains to use the data from the tree files with minimal
+    processing controlled by the `text_processing` argument to clean up some escaping or
+    transliteration.
+
+    Args:
+        const_path: Path to the file with one tree per line.
+        text_path: (optional) Path to a file that provides the correct spelling for all
+            tokens (without any escaping, transliteration, or other mangling) and
+            information about whether there is whitespace after each token. Files in the
+            CoNLL-U format (https://universaldependencies.org/format.html) are accepted,
+            but the parser also accepts similarly-formatted files with just three fields
+            (ID, FORM, MISC) instead of the usual ten. Text is recovered from the FORM
+            field and any "SpaceAfter=No" annotations in the MISC field.
+        text_processing: Text processing to use if no text_path is specified:
+            - 'default': undo PTB-style escape sequences and attempt to guess whitespace
+                surrounding punctuation
+            - 'arabic': guess that all tokens are separated by spaces
+            - 'arabic-translit': undo Buckwalter transliteration and guess that all
+                tokens are separated by spaces
+            - 'chinese': keep all tokens unchanged (i.e. do not attempt to find any
+                escape sequences), and assume no whitespace between tokens
+            - 'hebrew': guess that all tokens are separated by spaces
+            - 'hebrew-translit': undo transliteration (see Sima'an et al. 2002) and
+                guess that all tokens are separated by spaces
+
+    Returns:
+        A list of ParsingExample objects, which have the following attributes:
+            - `tree` is an instance of nltk.Tree
+            - `words` is a list of strings
+            - `space_after` is a list of booleans
+    """
+    reader = BracketParseCorpusReader("", [const_path])
+    trees = reader.parsed_sents()
+
+    if text_path is not None:
+        sents = read_text(text_path)
+    elif text_processing in ("arabic-translit", "hebrew-translit"):
+        translit = transliterate.TRANSLITERATIONS[
+            text_processing.replace("-translit", "")
+        ]
+        sents = []
+        for tree in trees:
+            words = [translit(word) for word in tree.leaves()]
+            sp_after = [True for _ in words]
+            sents.append((words, sp_after))
+    elif text_processing in ("arabic", "hebrew"):
+        sents = []
+        for tree in trees:
+            words = tree.leaves()
+            sp_after = [True for _ in words]
+            sents.append((words, sp_after))
+    elif text_processing == "chinese":
+        sents = []
+        for tree in trees:
+            words = tree.leaves()
+            sp_after = [False for _ in words]
+            sents.append((words, sp_after))
+    elif text_processing == "default":
+        sents = []
+        for tree in trees:
+            words = ptb_unescape.ptb_unescape(tree.leaves())
+            sp_after = ptb_unescape.guess_space_after(tree.leaves())
+            sents.append((words, sp_after))
+    else:
+        raise ValueError(f"Bad value for text_processing: {text_processing}")
+
+    assert len(trees) == len(sents)
+    treebank = Treebank(
+        [
+            ParsingExample(tree=tree, words=words, space_after=space_after)
+            for tree, (words, space_after) in zip(trees, sents)
+        ]
+    )
+    for example in treebank:
+        assert len(example.words) == len(example.leaves()), (
+            "Constituency tree has a different number of tokens than the CONLL-U or "
+            "other file used to specify reversible tokenization."
+        )
+    return treebank
+
+def load_text(const_path):
+    """Load the test data from conll file and store each sample in a ParseExample class.
+
+    Example of input file in conll format:
+    Zum     APPRART.Dat.Sg.Neut
+    ersten  ADJA.Sup.Dat.Sg.Neut
+    sollen  VMFIN.1.Pl.Pres.*
+    wir     PPER.1.Nom.Pl.*
+
+    Lieber  ADJA.Pos.Nom.Sg.Masc
+    mensch  NN.Nom.Sg.Masc
+    gedenck VVIMP.2.Sg.Imp
+    und     KON
+
+    Args:
+        const_path: Path to the conll file with one (token, tag) pair each line and each
+        sentence separated by a new line '\n'.
+
+    Returns:
+        A list of ParsingExample objects, which have the following attributes:
+            - `words` is a list of strings
+            - `_pos` is a list of (token, tag) tuples
+            - `space_after` is a list of booleans
+    """
+    with open(const_path, 'r', encoding='utf-8') as f:
+        examples = []
+        tags, tag_pairs, word_tag_pairs = [], [], []
+        for line in f.readlines():
+            if line == '\n':
+                sp_after = ptb_unescape.guess_space_after(tags)
+                examples.append(
+                    ParsingExample(words=tags, space_after=sp_after, _pos=tag_pairs, 
+                    word_tag_pairs=word_tag_pairs))
+                tags, tag_pairs, word_tag_pairs = [], [], []
+            else:
+                word, tag = line.strip().split()
+                tags.append(tag)
+                word_tag_pairs.append((word, tag))
+                tag_pairs.append((tag, tag.split('.')[0]))
+
+    treebank = Treebank(examples)
+
+    return treebank
+
+def load_single_text(words, tags):
+    """Load the test data from conll file and store each sample in a ParseExample class.
+
+    Example of input file in conll format:
+    Zum     APPRART.Dat.Sg.Neut
+    ersten  ADJA.Sup.Dat.Sg.Neut
+    sollen  VMFIN.1.Pl.Pres.*
+    wir     PPER.1.Nom.Pl.*
+
+    Lieber  ADJA.Pos.Nom.Sg.Masc
+    mensch  NN.Nom.Sg.Masc
+    gedenck VVIMP.2.Sg.Imp
+    und     KON
+
+    Args:
+        const_path: Path to the conll file with one (token, tag) pair each line and each
+        sentence separated by a new line '\n'.
+
+    Returns:
+        A list of ParsingExample objects, which have the following attributes:
+            - `words` is a list of strings
+            - `_pos` is a list of (token, tag) tuples
+            - `space_after` is a list of booleans
+    """
+
+    examples = []
+    tag_pairs, word_tag_pairs = [], []
+    sp_after = ptb_unescape.guess_space_after(tags)
+    for word, tag in zip(words, tags):
+        word_tag_pairs.append((word, tag))
+        tag_pairs.append((tag, tag.split('.')[0]))
+        
+    examples.append(
+            ParsingExample(words=tags, space_after=sp_after, _pos=tag_pairs, 
+            word_tag_pairs=word_tag_pairs))
+
+            
+
+    treebank = Treebank(examples)
+
+    return treebank
\ No newline at end of file
diff --git a/parsing/src/treebanks.py~ b/parsing/src/treebanks.py~
new file mode 100644
index 0000000000000000000000000000000000000000..99ab39506859ef58ac75ee5de7cdf0ea7f270b37
--- /dev/null
+++ b/parsing/src/treebanks.py~
@@ -0,0 +1,235 @@
+import dataclasses
+from typing import List, Optional, Tuple
+
+import nltk
+from nltk.corpus.reader.bracket_parse import BracketParseCorpusReader
+import tokenizations
+import torch
+
+from benepar import ptb_unescape
+from benepar.parse_base import BaseInputExample
+import transliterate
+
+
+@dataclasses.dataclass
+class ParsingExample(BaseInputExample):
+    """A single parse tree and sentence."""
+
+    words: List[str]
+    space_after: List[bool]
+    tree: Optional[nltk.Tree] = None
+    _pos: Optional[List[Tuple[str, str]]] = None
+
+    def leaves(self):
+        if self.tree is not None:
+            return self.tree.leaves()
+        elif self._pos is not None:
+            return [word for word, tag in self._pos]
+        else:
+            return None
+
+    def pos(self):
+        if self.tree is not None:
+            return self.tree.pos()
+        else:
+            return self._pos
+
+    def without_gold_annotations(self):
+        return dataclasses.replace(self, tree=None, _pos=self.pos())
+
+
+class Treebank(torch.utils.data.Dataset):
+    def __init__(self, examples):
+        self.examples = examples
+
+    def __len__(self):
+        return len(self.examples)
+
+    def __getitem__(self, index):
+        return self.examples[index]
+
+    @property
+    def trees(self):
+        return [x.tree for x in self.examples]
+
+    @property
+    def sents(self):
+        return [x.words for x in self.examples]
+
+    @property
+    def tagged_sents(self):
+        return [x.pos() for x in self.examples]
+
+    def filter_by_length(self, max_len):
+        return Treebank([x for x in self.examples if len(x.leaves()) <= max_len])
+
+    def without_gold_annotations(self):
+        return Treebank([x.without_gold_annotations() for x in self.examples])
+
+
+def read_text(text_path):
+    sents = []
+    sent = []
+    end_of_multiword = 0
+    multiword_combined = ""
+    multiword_separate = []
+    multiword_sp_after = False
+    with open(text_path) as f:
+        for line in f:
+            if not line.strip() or line.startswith("#"):
+                if sent:
+                    sents.append(([w for w, sp in sent], [sp for w, sp in sent]))
+                    sent = []
+                    assert end_of_multiword == 0
+                continue
+            fields = line.split("\t", 2)
+            num_or_range = fields[0]
+            w = fields[1]
+
+            if "-" in num_or_range:
+                end_of_multiword = int(num_or_range.split("-")[1])
+                multiword_combined = w
+                multiword_separate = []
+                multiword_sp_after = "SpaceAfter=No" not in fields[-1]
+                continue
+            elif int(num_or_range) <= end_of_multiword:
+                multiword_separate.append(w)
+                if int(num_or_range) == end_of_multiword:
+                    _, separate_to_combined = tokenizations.get_alignments(
+                        multiword_combined, multiword_separate
+                    )
+                    have_up_to = 0
+                    for i, char_idxs in enumerate(separate_to_combined):
+                        if i == len(multiword_separate) - 1:
+                            word = multiword_combined[have_up_to:]
+                            sent.append((word, multiword_sp_after))
+                        elif char_idxs:
+                            word = multiword_combined[have_up_to : max(char_idxs) + 1]
+                            sent.append((word, False))
+                            have_up_to = max(char_idxs) + 1
+                        else:
+                            sent.append(("", False))
+                    assert int(num_or_range) == len(sent)
+                    end_of_multiword = 0
+                    multiword_combined = ""
+                    multiword_separate = []
+                    multiword_sp_after = False
+                continue
+            else:
+                assert int(num_or_range) == len(sent) + 1
+                sp = "SpaceAfter=No" not in fields[-1]
+                sent.append((w, sp))
+    return sents
+
+
+def load_trees(const_path, text_path=None, text_processing="default"):
+    """Load a treebank.
+
+    The standard tree format presents an abstracted view of the raw text, with the
+    assumption that a tokenizer and other early stages of the NLP pipeline have already
+    been run. These can include formatting changes like escaping certain characters
+    (e.g. -LRB-) or transliteration (see e.g. the Arabic and Hebrew SPMRL datasets).
+    Tokens are not always delimited by whitespace, and the raw whitespace in the source
+    text is thrown away in the PTB tree format. Moreover, in some treebanks the leaves
+    of the trees are lemmas/stems rather than word forms.
+
+    All of this is a mismatch for pre-trained transformer models, which typically do
+    their own tokenization starting with raw unicode strings. A mismatch compared to
+    pre-training often doesn't affect performance if you just want to report F1 scores
+    within the same treebank, but it raises some questions when it comes to releasing a
+    parser for general use: (1) Must the parser be integrated with a tokenizer that
+    matches the treebank convention? In fact, many modern NLP libraries like spaCy train
+    on dependency data that doesn't necessarily use the same tokenization convention as
+    constituency treebanks. (2) Can the parser's pre-trained model be merged with other
+    pre-trained system components (via methods like multi-task learning or adapters), or
+    must it remain its own system because of tokenization mismatches?
+
+    This tree-loading function aims to build a path towards parsing from raw text by
+    using the `text_path` argument to specify an auxiliary file that can be used to
+    recover the original unicode string for the text. Parser layers above the
+    pre-trained model may still use gold tokenization during training, but this will
+    possibly help make the parser more robust to tokenization mismatches.
+
+    On the other hand, some benchmarks involve evaluating with gold tokenization, and
+    naively switching to using raw text degrades performance substantially. This can
+    hopefully be addressed by making the parser layers on top of the pre-trained
+    transformers handle tokenization more intelligently, but this is still a work in
+    progress and the option remains to use the data from the tree files with minimal
+    processing controlled by the `text_processing` argument to clean up some escaping or
+    transliteration.
+
+    Args:
+        const_path: Path to the file with one tree per line.
+        text_path: (optional) Path to a file that provides the correct spelling for all
+            tokens (without any escaping, transliteration, or other mangling) and
+            information about whether there is whitespace after each token. Files in the
+            CoNLL-U format (https://universaldependencies.org/format.html) are accepted,
+            but the parser also accepts similarly-formatted files with just three fields
+            (ID, FORM, MISC) instead of the usual ten. Text is recovered from the FORM
+            field and any "SpaceAfter=No" annotations in the MISC field.
+        text_processing: Text processing to use if no text_path is specified:
+            - 'default': undo PTB-style escape sequences and attempt to guess whitespace
+                surrounding punctuation
+            - 'arabic': guess that all tokens are separated by spaces
+            - 'arabic-translit': undo Buckwalter transliteration and guess that all
+                tokens are separated by spaces
+            - 'chinese': keep all tokens unchanged (i.e. do not attempt to find any
+                escape sequences), and assume no whitespace between tokens
+            - 'hebrew': guess that all tokens are separated by spaces
+            - 'hebrew-translit': undo transliteration (see Sima'an et al. 2002) and
+                guess that all tokens are separated by spaces
+
+    Returns:
+        A list of ParsingExample objects, which have the following attributes:
+            - `tree` is an instance of nltk.Tree
+            - `words` is a list of strings
+            - `space_after` is a list of booleans
+    """
+    reader = BracketParseCorpusReader("", [const_path])
+    trees = reader.parsed_sents()
+
+    if text_path is not None:
+        sents = read_text(text_path)
+    elif text_processing in ("arabic-translit", "hebrew-translit"):
+        translit = transliterate.TRANSLITERATIONS[
+            text_processing.replace("-translit", "")
+        ]
+        sents = []
+        for tree in trees:
+            words = [translit(word) for word in tree.leaves()]
+            sp_after = [True for _ in words]
+            sents.append((words, sp_after))
+    elif text_processing in ("arabic", "hebrew"):
+        sents = []
+        for tree in trees:
+            words = tree.leaves()
+            sp_after = [True for _ in words]
+            sents.append((words, sp_after))
+    elif text_processing == "chinese":
+        sents = []
+        for tree in trees:
+            words = tree.leaves()
+            sp_after = [False for _ in words]
+            sents.append((words, sp_after))
+    elif text_processing == "default":
+        sents = []
+        for tree in trees:
+            words = ptb_unescape.ptb_unescape(tree.leaves())
+            sp_after = ptb_unescape.guess_space_after(tree.leaves())
+            sents.append((words, sp_after))
+    else:
+        raise ValueError(f"Bad value for text_processing: {text_processing}")
+
+    assert len(trees) == len(sents)
+    treebank = Treebank(
+        [
+            ParsingExample(tree=tree, words=words, space_after=space_after)
+            for tree, (words, space_after) in zip(trees, sents)
+        ]
+    )
+    for example in treebank:
+        assert len(example.words) == len(example.leaves()), (
+            "Constituency tree has a different number of tokens than the CONLL-U or "
+            "other file used to specify reversible tokenization."
+        )
+    return treebank
diff --git a/parsing/src/utils.py b/parsing/src/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..5cdd5f3c926ae0f18ba5870dd8899331e7f478f5
--- /dev/null
+++ b/parsing/src/utils.py
@@ -0,0 +1,78 @@
+import sys
+from nltk.corpus.reader.bracket_parse import BracketParseCorpusReader
+
+
+"""
+This functions is used to replace the leaves of parse trees in a file of text string form by the leaves from
+a file of conll format.
+It takes the file for the parse trees of text string form and the file of conll file, which have to correspond
+with each other. 
+It creates a new file containing the replaced parse tree in text string format.
+"""
+def replace_leaves(parse_path, conll_path, output_path):
+
+    # extract new leaves form the conll file
+    with open(conll_path, 'r', encoding='utf-8') as f_conll:
+        leaves = []
+        current_leaves = []
+        for line in f_conll.readlines():
+            if line == '\n':
+                leaves.append(current_leaves)
+                current_leaves = []
+            else:
+                leaf = line.split()[1].strip()
+                current_leaves.append(leaf)
+    
+    # read the original parse tree
+    reader = BracketParseCorpusReader('', [parse_path])
+    trees = reader.parsed_sents()
+
+    assert len(trees) == len(leaves), "The number of trees and leaves is not matched."
+    with open(output_path, 'w', encoding='utf-8') as f_output:
+        for i, (tree, current_leaves) in enumerate(zip(trees, leaves)):
+            leaf_positions = tree.treepositions('leaves')
+            assert len(leaf_positions) == len(current_leaves), f"The number of leaves is not matched at position {i}:\
+            {len(leaf_positions)} vs {len(current_leaves)} \n{tree.leaves()}\n{current_leaves}"
+            for j, (pos, leaf) in enumerate(zip(leaf_positions, current_leaves)):
+                tree[pos] = leaf
+            f_output.write('{}\n'.format(tree.pformat(margin=1e100)))
+
+def replace_labels(parse_path, conll_path, output_path):
+
+    # extract new leaves form the conll file
+    with open(conll_path, 'r', encoding='utf-8') as f_conll:
+        leaves = []
+        current_leaves = []
+        for line in f_conll.readlines():
+            if line == '\n':
+                leaves.append(current_leaves)
+                current_leaves = []
+            else:
+                leaf = line.split()[1].strip()
+                current_leaves.append(leaf)
+    
+    # read the original parse tree
+    reader = BracketParseCorpusReader('', [parse_path])
+    trees = reader.parsed_sents()
+
+    assert len(trees) == len(leaves), "The number of trees and leaves is not matched."
+    with open(output_path, 'w', encoding='utf-8') as f_output:
+        for i, (tree, current_leaves) in enumerate(zip(trees, leaves)):
+            leaf_positions = tree.treepositions('leaves')
+            assert len(leaf_positions) == len(current_leaves), f"The number of leaves is not matched at position {i}:\
+            {len(leaf_positions)} vs {len(current_leaves)} \n{tree.leaves()}\n{current_leaves}"
+            for j, (pos, leaf) in enumerate(zip(leaf_positions, current_leaves)):
+                tree[pos[:-1]].set_label(leaf)
+            f_output.write('{}\n'.format(tree.pformat(margin=1e100)))
+
+
+"""
+For example:
+cd schmid/MHG-Parser/self-attentive-parser-master
+python src/utils.py data/mhg/MHG.parses data/mhg/MHG.mapped data/mhg/MHG_retag.parses
+"""
+if __name__=='__main__':
+    assert len(sys.argv) == 4, "Wrong number of input file paths"
+    parse_path, conll_path, output_path = sys.argv[1:]
+    # replace_leaves(parse_path, conll_path, output_path)
+    replace_labels(parse_path, conll_path, output_path)
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..443ff4ee3320e71c83afee232d76b6c7fa9d41dc
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,9 @@
+accelerate==0.26.1
+huggingface-hub==0.20.2
+numpy==1.23.4
+scipy==1.10.1
+tokenizers==0.15.0
+torch==2.1.2
+transformers==4.36.2
+nltk
+torch-struct
\ No newline at end of file