@inbook{b968e2f9bde847ecbbe2fe4f4ed059f0,
title = "A filter for syntactically incomparable parallel sentences",
abstract = "Massive automatic comparison of languages in parallel corpora will greatly speed up and enhance comparative syntactic research. Automatically extracting and mining syntactic differences from parallel corpora requires a pre-processing step that filters out sentence pairs that cannot be compared syntactically, for example because they involve “free” translations. In this paper we explore four possible filters: the Damerau-Levenshtein distance between POS-tags, the sentence-length ratio, the graph-edit distance between dependency parses, and a combination of the three in a logistic regression model. Results suggest that the dependency-parse filter is the most stable throughout language pairs, while the combination filter achieves the best results",
author = "Martin Kroon and Sjef Barbiers and Jan Odijk and Pas, {St{\'e}fanie van der}",
year = "2019",
month = dec,
language = "English",
series = "AVT Publications",
publisher = "John Benjamins",
pages = "147--161",
editor = "Janine Berns and Elena Tribushinina",
booktitle = "Linguistics in the Netherlands 2019",
address = "Netherlands",
}