Spell-Checking-with-Hunspell

From Qt Wiki
Revision as of 10:14, 11 July 2015 by Yash (talk | contribs)
Jump to navigation Jump to search

Spell Checking with Hunspell

Prerequisites

You have to get Hunspell from http://hunspell.sourceforge.net/ [hunspell.sourceforge.net]. I have tested the classes with version 1.2.8. It may work with a more recent version (current is 1.2.12). Dictionaries can be downloaded via the sourceforge page.

If you plan to use the spell checker in a commercial software, watch out for the copyright and license of the dictionaries!

The source code on this wiki page is released by me (Volker, being the original author) under the 2 clause BSD license [en.wikipedia.org]

You also must adapt a few files before everything works:

The project file SpellChecker.pro: Adjust the paths to your hunspell installation

dialog.cpp: In method Dialog::checkSpelling() adjust the paths to the dictionaries. You must provide the base name of the dictionary, excluding the suffixes .dic and .aff. The user dictionary is optional. The path is the complete path to the file, including an extension (which is optional).

Sources

SpellChecker.pro

 QT       += core gui

TARGET = SpellChecker
TEMPLATE = app

SOURCES += main.cpp dialog.cpp spellchecker.cpp spellcheckdialog.cpp
HEADERS += dialog.h spellchecker.h spellcheckdialog.h
FORMS += dialog.ui spellcheckdialog.ui

unix {
    INCLUDEPATH += /path/to/hunspell/include
    LIBS += /path/to/hunspell/lib/libhunspell-1.2.a 
}

win32 {
    INCLUDEPATH += C:/path/to/hunspell/include
    LIBS += C:/path/to/hunspell/Release/hunspell.lib
}

main.cpp

#include <QApplication>
#include "dialog.h"

int main(int argc, char *argv[])
{
    QApplication a(argc, argv);
    Dialog w;
    w.show();

    return a.exec();
}

dialog.ui

 <?xml version="1.0" encoding="UTF-8"?>
 <ui version="4.0">
  <class>Dialog</class>
  <widget class="QDialog" name="Dialog">
  <property name="geometry">
   <rect>
    <x>750</x>
    <y>430</y>
    <width>400</width>
    <height>300</height>
   </rect>
  </property>
  <property name="windowTitle">
   <string>Dialog</string>
  </property>
  <layout class="QGridLayout" name="gridLayout">
   <item row="0" column="0" colspan="2">
    <widget class="QTextEdit" name="textEdit"/>
   </item>
   <item row="1" column="0">
    <spacer name="horizontalSpacer">
     <property name="orientation">
      <enum>Qt::Horizontal</enum>
     </property>
     <property name="sizeHint" stdset="0">
      <size>
       <width>239</width>
       <height>20</height>
      </size>
     </property>
    </spacer>
   </item>
   <item row="1" column="1">
    <widget class="QPushButton" name="buttonCheckSpelling">
     <property name="text">
      <string>check spelling</string>
     </property>
    </widget>
   </item>
  </layout>
 </widget>
 <layoutdefault spacing="6" margin="11"/>
 <resources/>
 <connections/>
</ui>

dialog.h

#ifndef DIALOG_H
#define DIALOG_H

#include <QDialog>

namespace Ui {
    class Dialog;
}

class Dialog : public QDialog
{
    Q_OBJECT

public:
    explicit Dialog(QWidget *parent = 0);
    ~Dialog();

protected slots:
    void checkSpelling();

private:
    Ui::Dialog *ui;
};

#endif // DIALOG_H


dialog.cpp

#include "dialog.h"
#include "ui_dialog.h"
#include "spellchecker.h"
#include "spellcheckdialog.h"

#include <QMessageBox>

Dialog::Dialog(QWidget *parent) :
    QDialog(parent),
    ui(new Ui::Dialog)
{
    ui->setupUi(this);
    connect(ui->buttonCheckSpelling, SIGNAL(clicked()), this, SLOT(checkSpelling()));
}


Dialog::~Dialog()
{
    delete ui;
}


void Dialog::checkSpelling()
{
    QString dictPath = "/path/to/your/dictionary/de_DE_neu/de_DE_neu";
    QString userDict= "/tmp/userDict.txt";
    SpellChecker *spellChecker = new SpellChecker(dictPath, userDict);
    SpellCheckDialog *checkDialog = new SpellCheckDialog(spellChecker, this);

    QTextCharFormat highlightFormat;
    highlightFormat.setBackground(QBrush(QColor("#ff6060")));
    highlightFormat.setForeground(QBrush(QColor("#000000")));
    // alternative format
    //highlightFormat.setUnderlineColor(QColor("red"));
    //highlightFormat.setUnderlineStyle(QTextCharFormat::SpellCheckUnderline);

    // save the position of the current cursor
    QTextCursor oldCursor = ui->textEdit->textCursor();

    // create a new cursor to walk through the text
    QTextCursor cursor(ui->textEdit->document());

    // Don't call cursor.beginEditBlock(), as this prevents the rewdraw after changes to the content
    // cursor.beginEditBlock();
    while(!cursor.atEnd()) {
        QCoreApplication::processEvents();
        cursor.movePosition(QTextCursor::EndOfWord, QTextCursor::KeepAnchor, 1);
        QString word = cursor.selectedText();

        // Workaround for better recognition of words
        // punctuation etc. does not belong to words
        while(!word.isEmpty() && !word.at(0).isLetter() && cursor.anchor() < cursor.position()) {
            int cursorPos = cursor.position();
            cursor.setPosition(cursor.anchor() + 1, QTextCursor::MoveAnchor);
            cursor.setPosition(cursorPos, QTextCursor::KeepAnchor);
            word = cursor.selectedText();
        }

        if(!word.isEmpty() && !spellChecker->spell(word)) {
            QTextCursor tmpCursor(cursor);
            tmpCursor.setPosition(cursor.anchor());
            ui->textEdit->setTextCursor(tmpCursor);
            ui->textEdit->ensureCursorVisible();

            // highlight the unknown word
            QTextEdit::ExtraSelection es;
            es.cursor = cursor;
            es.format = highlightFormat;

            QList<QTextEdit::ExtraSelection> esList;
            esList << es;
            ui->textEdit->setExtraSelections(esList);
            QCoreApplication::processEvents();

            // ask the user what to do
            SpellCheckDialog::SpellCheckAction spellResult = checkDialog->checkWord(word);

            // reset the word highlight
            esList.clear();
            ui->textEdit->setExtraSelections(esList);
            QCoreApplication::processEvents();

            if(spellResult == SpellCheckDialog::AbortCheck)
                break;

            switch(spellResult) {
                case SpellCheckDialog::ReplaceOnce:
                    cursor.insertText(checkDialog->replacement());
                    break;

                default:
                    break;
            }
            QCoreApplication::processEvents();
        }
        cursor.movePosition(QTextCursor::NextWord, QTextCursor::MoveAnchor, 1);
    }
    //cursor.endEditBlock();
    ui->textEdit->setTextCursor(oldCursor);

    QMessageBox::information(
                this,
                tr("Finished"),
                tr("The spell check has finished."));
}


spellcheckdialog.ui

<?xml version="1.0" encoding="UTF-8"?>
<ui version="4.0">
 <class>SpellCheckDialog</class>
 <widget class="QDialog" name="SpellCheckDialog">
  <property name="geometry">
   <rect>
    <x>600</x>
    <y>326</y>
    <width>701</width>
    <height>509</height>
   </rect>
  </property>
  <property name="windowTitle">
   <string>Spell Checker</string>
  </property>
  <layout class="QGridLayout" name="gridLayout">
   <item row="0" column="0">
    <widget class="QLabel" name="label">
     <property name="text">
      <string>unknown word:</string>
     </property>
    </widget>
   </item>
   <item row="0" column="1">
    <widget class="QLabel" name="lblUnknownWord">
     <property name="sizePolicy">
      <sizepolicy hsizetype="Expanding" vsizetype="Preferred">
       <horstretch>0</horstretch>
       <verstretch>0</verstretch>
      </sizepolicy>
     </property>
     <property name="text">
      <string>TextLabel</string>
     </property>
    </widget>
   </item>
   <item row="0" column="2" colspan="2">
    <widget class="QPushButton" name="btnAddToDict">
     <property name="text">
      <string><< add to dictionary</string>
     </property>
    </widget>
   </item>
   <item row="1" column="0">
    <widget class="QLabel" name="label_2">
     <property name="text">
      <string>replace with:</string>
     </property>
    </widget>
   </item>
   <item row="1" column="1" colspan="2">
    <widget class="QLineEdit" name="ledtReplaceWith"/>
   </item>
   <item row="1" column="3" rowspan="2">
    <layout class="QVBoxLayout" name="verticalLayout">
     <item>
      <widget class="QPushButton" name="btnReplaceOnce">
       <property name="text">
        <string>replace once</string>
       </property>
      </widget>
     </item>
     <item>
      <widget class="QPushButton" name="btnReplaceAll">
       <property name="text">
        <string>replace all</string>
       </property>
      </widget>
     </item>
     <item>
      <spacer name="verticalSpacer_2">
       <property name="orientation">
        <enum>Qt::Vertical</enum>
       </property>
       <property name="sizeType">
        <enum>QSizePolicy::Fixed</enum>
       </property>
       <property name="sizeHint" stdset="0">
        <size>
         <width>20</width>
         <height>13</height>
        </size>
       </property>
      </spacer>
     </item>
     <item>
      <widget class="QPushButton" name="btnIgnoreOnce">
       <property name="text">
        <string>ignore once</string>
       </property>
      </widget>
     </item>
     <item>
      <widget class="QPushButton" name="btnIgnoreAll">
       <property name="text">
        <string>ignore all</string>
       </property>
      </widget>
     </item>
     <item>
      <spacer name="verticalSpacer">
       <property name="orientation">
        <enum>Qt::Vertical</enum>
       </property>
       <property name="sizeHint" stdset="0">
        <size>
         <width>20</width>
         <height>240</height>
        </size>
       </property>
      </spacer>
     </item>
     <item>
      <widget class="QPushButton" name="btnCancel">
       <property name="text">
        <string>cancel</string>
       </property>
      </widget>
     </item>
    </layout>
   </item>
   <item row="2" column="0" colspan="3">
    <widget class="QListWidget" name="listWidget">
     <property name="editTriggers">
      <set>QAbstractItemView::NoEditTriggers</set>
     </property>
     <property name="showDropIndicator" stdset="0">
      <bool>false</bool>
     </property>
    </widget>
   </item>
  </layout>
 </widget>
 <resources/>
 <connections/>
</ui>


spellcheckdialog.h

#ifndef SPELLCHECKDIALOG_H
#define SPELLCHECKDIALOG_H

#include <QDialog>

class SpellChecker;

namespace Ui {
    class SpellCheckDialog;
}

class SpellCheckDialog : public QDialog
{
    Q_OBJECT

public:
    enum SpellCheckAction {AbortCheck, IgnoreOnce, IgnoreAll, ReplaceOnce, ReplaceAll, AddToDict};

    explicit SpellCheckDialog(SpellChecker *spellChecker, QWidget *parent = 0);
    ~SpellCheckDialog();

    QString replacement() const;

public slots:
    SpellCheckAction checkWord(const QString &word);

protected slots:
    void ignoreOnce();
    void ignoreAll();
    void replaceOnce();
    void replaceAll();
    void addToDict();

private:
    Ui::SpellCheckDialog *ui;
    SpellChecker *_spellChecker;
    QString _unkownWord;
    SpellCheckAction _returnCode;
};

#endif // SPELLCHECKDIALOG_H


spellcheckdialog.cpp

#include "spellcheckdialog.h"
#include "ui_spellcheckdialog.h"

#include "spellchecker.h"

SpellCheckDialog::SpellCheckDialog(SpellChecker *spellChecker, QWidget *parent) :
    QDialog(parent),
    ui(new Ui::SpellCheckDialog)
{
    ui->setupUi(this);
    _spellChecker = spellChecker;

    connect(ui->listWidget, SIGNAL(currentTextChanged(QString)), ui->ledtReplaceWith, SLOT(setText(QString)));

    connect(ui->btnAddToDict, SIGNAL(clicked()), this, SLOT(addToDict()));
    connect(ui->btnReplaceOnce, SIGNAL(clicked()), this, SLOT(replaceOnce()));
    connect(ui->btnReplaceAll, SIGNAL(clicked()), this, SLOT(replaceAll()));
    connect(ui->btnIgnoreOnce, SIGNAL(clicked()), this, SLOT(ignoreOnce()));
    connect(ui->btnIgnoreAll, SIGNAL(clicked()), this, SLOT(ignoreAll()));
    connect(ui->btnCancel, SIGNAL(clicked()), this, SLOT(reject()));
}


SpellCheckDialog::~SpellCheckDialog()
{
    delete ui;
}


SpellCheckDialog::SpellCheckAction SpellCheckDialog::checkWord(const QString &word)
{
    _unkownWord = word;
    ui->lblUnknownWord->setText(QString("<b>%1</b>").arg(_unkownWord));

    ui->ledtReplaceWith->clear();

    QStringList suggestions = _spellChecker->suggest(word);
    ui->listWidget->clear();
    ui->listWidget->addItems(suggestions);

    if(suggestions.count() > 0)
        ui->listWidget->setCurrentRow(0, QItemSelectionModel::Select);

    _returnCode = AbortCheck;
    QDialog::exec();
    return _returnCode;
}


QString SpellCheckDialog::replacement() const
{
    return ui->ledtReplaceWith->text();
}


void SpellCheckDialog::ignoreOnce()
{
    _returnCode = IgnoreOnce;
    accept();
}


void SpellCheckDialog::ignoreAll()
{
    _spellChecker->ignoreWord(_unkownWord);
    _returnCode = IgnoreAll;
    accept();
}


void SpellCheckDialog::replaceOnce()
{
    _returnCode = ReplaceOnce;
    accept();
}


void SpellCheckDialog::replaceAll()
{
    _returnCode = ReplaceAll;
    accept();
}


void SpellCheckDialog::addToDict()
{
    _spellChecker->addToUserWordlist(_unkownWord);
    _returnCode = AddToDict;
    accept();
}


spellchecker.h

#ifndef SPELLCHECKER_H
#define SPELLCHECKER_H

#include <QString>

class Hunspell;

class SpellChecker
{
public:
    SpellChecker(const QString &dictionaryPath, const QString &userDictionary);
    ~SpellChecker();

    bool spell(const QString &word);
    QStringList suggest(const QString &word);
    void ignoreWord(const QString &word);
    void addToUserWordlist(const QString &word);

private:
    void put_word(const QString &word);
    Hunspell *_hunspell;
    QString _userDictionary;
    QString _encoding;
    QTextCodec *_codec;
};

#endif // SPELLCHECKER_H

spellchecker.cpp

#include "spellchecker.h"

#include <QFile>
#include <QTextStream>
#include <QTextCodec>
#include <QStringList>
#include <QDebug>

#include "hunspell/hunspell.hxx"

SpellChecker::SpellChecker(const QString &dictionaryPath, const QString &userDictionary)
{
    _userDictionary = userDictionary;

    QString dictFile = dictionaryPath + ".dic";
    QString affixFile = dictionaryPath + ".aff";
    QByteArray dictFilePathBA = dictFile.toLocal8Bit();
    QByteArray affixFilePathBA = affixFile.toLocal8Bit();
    _hunspell = new Hunspell(affixFilePathBA.constData(), dictFilePathBA.constData());

    // detect encoding analyzing the SET option in the affix file
    _encoding = "ISO8859-1";
    QFile _affixFile(affixFile);
    if (_affixFile.open(QIODevice::ReadOnly)) {
        QTextStream stream(&_affixFile);
        QRegExp enc_detector("^\\s*SET\\s+([A-Z0-9\\-]+)\\s*", Qt::CaseInsensitive);
        for(QString line = stream.readLine(); !line.isEmpty(); line = stream.readLine()) {
            if (enc_detector.indexIn(line) > -1) {
                _encoding = enc_detector.cap(1);
                qDebug() << QString("Encoding set to ") + _encoding;
                break;
            }
        }
        _affixFile.close();
    }
    _codec = QTextCodec::codecForName(this->_encoding.toLatin1().constData());

    if(!_userDictionary.isEmpty()) {
        QFile userDictonaryFile(_userDictionary);
        if(userDictonaryFile.open(QIODevice::ReadOnly)) {
            QTextStream stream(&userDictonaryFile);
            for(QString word = stream.readLine(); !word.isEmpty(); word = stream.readLine())
                put_word(word);
            userDictonaryFile.close();
        } else {
            qWarning() << "User dictionary in " << _userDictionary << "could not be opened";
        }
    } else {
        qDebug() << "User dictionary not set.";
    }
}


SpellChecker::~SpellChecker()
{
    delete _hunspell;
}


bool SpellChecker::spell(const QString &word)
{
    // Encode from Unicode to the encoding used by current dictionary
    return _hunspell->spell(_codec->fromUnicode(word).constData()) != 0;
}


QStringList SpellChecker::suggest(const QString &word)
{
    char **suggestWordList;
    
    // Encode from Unicode to the encoding used by current dictionary
    int numSuggestions = _hunspell->suggest(&suggestWordList, _codec->fromUnicode(word).constData());
    QStringList suggestions;
    for(int i=0; i < numSuggestions; ++i) {
        suggestions << _codec->toUnicode(suggestWordList[i]);
        free(suggestWordList[i]);
    }
    return suggestions;
}


void SpellChecker::ignoreWord(const QString &word)
{
    put_word(word);
}


void SpellChecker::put_word(const QString &word)
{
    _hunspell->add(_codec->fromUnicode(word).constData());
}


void SpellChecker::addToUserWordlist(const QString &word)
{
    put_word(word);
    if(!_userDictionary.isEmpty()) {
        QFile userDictonaryFile(_userDictionary);
        if(userDictonaryFile.open(QIODevice::Append)) {
            QTextStream stream(&userDictonaryFile);
            stream << word << "\n";
            userDictonaryFile.close();
        } else {
            qWarning() << "User dictionary in " << _userDictionary << "could not be opened for appending a new word";
        }
    } else {
        qDebug() << "User dictionary not set.";
    }
}


BSD LICENSE 2011, Volker Götz

License removed from source to make code more readable.Added at bottom.