Using SEMPRE (Semantic Parser with Execution) as a Java library

Getting started

  • Step 1: Get the SEMPRE repository:
    git clone https://github.com/percyliang/sempre
  • Step 2: Navigate to the SEMPRE repository and follow the steps listed under Easy Setup in README.md
  • Step 3: Follow the steps listed below

You'll need

  • libsempre/sempre-core.jar
  • libsempre/sempre-cache.jar
  • libsempre/sempre-corenlp.jar if using CoreNLPAnalyzer
These are produced by following the steps under Easy setup in the README.md file of the SEMPRE repository.

You'll also need many of the dependencies under lib

Make sure the jars are included somehow when compiling your project. I like using gradle, so I just threw all the jars in a directory and included the directory as a dependency for my project:

dependencies {
    implementation fileTree(dir: '../lib', include: '*.jar')
}

Write yourself a parser class

The hot thing to do here would be to force you to dig through a bunch of poorly written explanations to find the full code listing. It's usually buried in the middle somewhere, disguised to look like another partial chunk of code. I'm not a fan of that. Here's the full code listing for a sample Parser class. We'll explain things later:

Parser.java

import edu.stanford.nlp.sempre.*;
import edu.stanford.nlp.sempre.corenlp.CoreNLPAnalyzer;
import fig.basic.Pair;

import java.util.*;
import java.util.stream.Collectors;

public class Parser {
    private Builder builder;
    private Dataset dataset;
    private Grammar grammar;
    private LanguageAnalyzer analyzer;

    Parser(LanguageAnalyzer analyzer) {
        this.builder = new Builder();
        this.dataset = new Dataset();
        this.grammar = new Grammar();
        this.analyzer = analyzer;

        // Equivalent command line option: -languageAnalyzer corenlp.CoreNLPAnalyzer
        // if `this.analyzer` is `new CoreNLPAnalyzer()`
        LanguageAnalyzer.setSingleton(this.analyzer);

        this.repository = repository;
    }

    public Parser() {
        this(new CoreNLPAnalyzer());
    }

    // Equivalent command line option: -Grammar.inPaths [grammarPath]
    public void setGrammarPath(String grammarPath) {
        grammar.read(grammarPath);
        builder.grammar = grammar;
    }

    // Equivalent command line option: -Dataset.inPaths train:[examplePath]
    public void setExamplePath(String examplePath) {
        dataset.readFromPathPairs(Collections.singletonList(new Pair<>("train", examplePath)));
    }

    public void initialize() {
        builder.buildUnspecified();
    }

    public void learn() {
        // Equivalent command line option: -FeatureExtractor.featureDomains rule
        FeatureExtractor.Options o = new FeatureExtractor.Options();
        o.featureDomains = Collections.singleton("rule");
        FeatureExtractor.opts = o;
        FeatureExtractor f = new FeatureExtractor(builder.executor);

        // Equivalent command line option: -Learner.maxTrainIters 3
        Learner.opts.maxTrainIters = 3;
        Learner learner = new Learner(builder.parser, builder.params, dataset);
        learner.learn();
    }

    // Parse with SEMPRE
    public Response parse(String query) {
        Example.Builder b = new Example.Builder();
        b.setId("session:1");
        b.setUtterance(query);
        Example ex = b.createExample();
        Response response = new Response(builder);

        ex.preprocess();

        // Parse!
        builder.parser.parse(builder.params, ex, false);
        response.ex = ex;
        response.candidateIndex = 0;

        return response;
    }
}

If you're using the sample class above, you'll also want to copy the Response class from within edu.stanford.nlp.sempre.Master. It's reproduced here:
Response.java

import edu.stanford.nlp.sempre.Builder;
import edu.stanford.nlp.sempre.Derivation;
import edu.stanford.nlp.sempre.Example;

import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;

// Copied from edu.stanford.nlp.sempre.Master
public class Response {
    // Example that was parsed, if any.
    public Example ex;
    private Builder builder;

    // Which derivation we're selecting to show
    int candidateIndex = -1;

    // Detailed information
    public Map stats = new LinkedHashMap<>();
    public List lines = new ArrayList<>();

    public String getFormulaAnswer() {
        if (ex.getPredDerivations().size() == 0)
            return "(no answer)";
        else if (candidateIndex == -1)
            return "(not selected)";
        else {
            Derivation deriv = getDerivation();
            return deriv.getFormula() + " => " + deriv.getValue();
        }
    }
    public String getAnswer() {
        if (ex.getPredDerivations().size() == 0)
            return "(no answer)";
        else if (candidateIndex == -1)
            return "(not selected)";
        else {
            Derivation deriv = getDerivation();
            deriv.ensureExecuted(builder.executor, ex.context);
            return deriv.getValue().toString();
        }
    }
    public List getLines() { return lines; }
    public Example getExample() { return ex; }
    public int getCandidateIndex() { return candidateIndex; }

    public Derivation getDerivation() {
        return ex.getPredDerivations().get(candidateIndex);
    }

    public Response(Builder b) {
        this.builder = b;
    }
}

Usage

ParserTest.java

import edu.stanford.nlp.sempre.SimpleAnalyzer;
import org.junit.jupiter.api.Test;

import static org.junit.jupiter.api.Assertions.*;

public class ParserTest {
    @Test
    public void testParser() {
        // We can use SimpleAnalyzer instead of CoreNLPAnalyzer (default when you run
        // the `run` script in SEMPRE is SimpleAnalyzer; default for the sample class above
        // is CoreNLPAnalyzer)
        Parser parser = new Parser(new SimpleAnalyzer());

        // Load grammar
        parser.setGrammarPath("arithmetic-tutorial.grammar");

        // Load training examples
        parser.setExamplePath("arithmetic-tutorial.examples");

        // Must call initialize before learning or parsing
        parser.initialize();

        // Learn from training examples
        parser.learn();

        // Unambiguous query (two plus four means 2 + 4, which is 6, and we expect only 1 prediction)
        Response resp = parser.parse("two plus four");
        assertEquals("(number 6)", resp.getAnswer());
        assertEquals(1, resp.ex.getPredDerivations().size());

        // Ambiguous parse (two and five could mean 2 + 5 or 2 * 5, so we expect 2 predictions)
        Response resp = parser.parse("two and five");
        assertEquals(2, resp.ex.getPredDerivations().size());
    }
}

No comments:

Post a Comment