PDFの表データをWebAPI化する話

### PDFからテキストを抽出するサンプル

```java
import java.util.regex.Pattern;
import java.util.regex.Matcher;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.encryption.AccessPermission;
import org.apache.pdfbox.text.PDFTextStripper;
import java.io.*;

public class PDF2TextConverter{
    public static void main(String... args) throws IOException {
        String file_path = args[0];

// PDFをロードし、内容を展開する権限がなかったらエラーにする。
        PDDocument document = PDDocument.load(new File(file_path));
        AccessPermission ap = document.getCurrentAccessPermission();
        if (! ap.canExtractContent()) {
            throw new IOException("You do not have permission to extract text.");
        }

// PDFを展開する準備。
        PDFTextStripper stripper = new PDFTextStripper();
        stripper.setSortByPosition(true);

// PDFからテキストのみを抽出し、ファイルに書き込むための準備。
        String text_file_name = file_path + ".txt";
        File f = new File(file_path + ".txt");
        FileOutputStream fos = new FileOutputStream(f);
        PrintWriter pw = new PrintWriter(fos);

// 100ページ単位でPDFからテキストを抽出し、ファイルに書き込んでゆく。
        final int parse_size = 100;
        StringBuffer buf = new StringBuffer();
        for (int p = 1; p <= document.getNumberOfPages(); p += parse_size) {
            stripper.setStartPage(p);
            stripper.setEndPage(p + (parse_size - 1));
            buf.setLength(0);

buf.append(stripper.getText(document).trim());

pw.println(buf.toString());
        }
        pw.close();
        fos.close();
    }
}
```