Spell-Checking-with-Hunspell: Difference between revisions
AutoSpider (talk | contribs) (Decode HTML entity numbers) |
|||
Line 112: | Line 112: | ||
<code> | <code> | ||
#ifndef DIALOG_H | #ifndef DIALOG_H | ||
#define DIALOG_H | #define DIALOG_H | ||
Line 167: | Line 144: | ||
<code> | <code> | ||
#include "dialog.h" | #include "dialog.h" | ||
#include "ui_dialog.h" | #include "ui_dialog.h" | ||
Line 352: | Line 249: | ||
tr("The spell check has finished.")); | tr("The spell check has finished.")); | ||
} | } | ||
</code> | |||
Line 498: | Line 395: | ||
===spellcheckdialog.=== | ===spellcheckdialog.h=== | ||
<code> | <code> | ||
#ifndef SPELLCHECKDIALOG_H | #ifndef SPELLCHECKDIALOG_H | ||
Line 573: | Line 446: | ||
<code> | <code> | ||
#include "spellcheckdialog.h" | #include "spellcheckdialog.h" | ||
Line 694: | Line 544: | ||
<code> | <code> | ||
#ifndef SPELLCHECKER_H | #ifndef SPELLCHECKER_H | ||
#define SPELLCHECKER_H | #define SPELLCHECKER_H | ||
Line 751: | Line 576: | ||
<code> | <code> | ||
#include "spellchecker.h" | #include "spellchecker.h" | ||
Line 886: | Line 687: | ||
} | } | ||
</code> | </code> | ||
BSD LICENSE 2011, Volker Götz | |||
License removed from source to make code more readable.Added at bottom. |
Revision as of 10:13, 11 July 2015
This article may require cleanup to meet the Qt Wiki's quality standards. Reason: Auto-imported from ExpressionEngine. Please improve this article if you can. Remove the {{cleanup}} tag and add this page to Updated pages list after it's clean. |
Spell Checking with Hunspell
Prerequisites
You have to get Hunspell from http://hunspell.sourceforge.net/ [hunspell.sourceforge.net]. I have tested the classes with version 1.2.8. It may work with a more recent version (current is 1.2.12). Dictionaries can be downloaded via the sourceforge page.
If you plan to use the spell checker in a commercial software, watch out for the copyright and license of the dictionaries!
The source code on this wiki page is released by me (Volker, being the original author) under the 2 clause BSD license [en.wikipedia.org]
You also must adapt a few files before everything works:
The project file SpellChecker.pro: Adjust the paths to your hunspell installation
dialog.cpp: In method Dialog::checkSpelling() adjust the paths to the dictionaries. You must provide the base name of the dictionary, excluding the suffixes .dic and .aff. The user dictionary is optional. The path is the complete path to the file, including an extension (which is optional).
Sources
SpellChecker.pro
QT += core gui
TARGET = SpellChecker
TEMPLATE = app
SOURCES += main.cpp dialog.cpp spellchecker.cpp spellcheckdialog.cpp
HEADERS += dialog.h spellchecker.h spellcheckdialog.h
FORMS += dialog.ui spellcheckdialog.ui
unix {
INCLUDEPATH += /path/to/hunspell/include
LIBS += /path/to/hunspell/lib/libhunspell-1.2.a
}
win32 {
INCLUDEPATH += C:/path/to/hunspell/include
LIBS += C:/path/to/hunspell/Release/hunspell.lib
}
main.cpp
#include <QApplication>
#include "dialog.h"
int main(int argc, char *argv[])
{
QApplication a(argc, argv);
Dialog w;
w.show();
return a.exec();
}
dialog.ui
<?xml version="1.0" encoding="UTF-8"?>
<ui version="4.0">
<class>Dialog</class>
<widget class="QDialog" name="Dialog">
<property name="geometry">
<rect>
<x>750</x>
<y>430</y>
<width>400</width>
<height>300</height>
</rect>
</property>
<property name="windowTitle">
<string>Dialog</string>
</property>
<layout class="QGridLayout" name="gridLayout">
<item row="0" column="0" colspan="2">
<widget class="QTextEdit" name="textEdit"/>
</item>
<item row="1" column="0">
<spacer name="horizontalSpacer">
<property name="orientation">
<enum>Qt::Horizontal</enum>
</property>
<property name="sizeHint" stdset="0">
<size>
<width>239</width>
<height>20</height>
</size>
</property>
</spacer>
</item>
<item row="1" column="1">
<widget class="QPushButton" name="buttonCheckSpelling">
<property name="text">
<string>check spelling</string>
</property>
</widget>
</item>
</layout>
</widget>
<layoutdefault spacing="6" margin="11"/>
<resources/>
<connections/>
</ui>
dialog.h
#ifndef DIALOG_H
#define DIALOG_H
#include <QDialog>
namespace Ui {
class Dialog;
}
class Dialog : public QDialog
{
Q_OBJECT
public:
explicit Dialog(QWidget *parent = 0);
~Dialog();
protected slots:
void checkSpelling();
private:
Ui::Dialog *ui;
};
#endif // DIALOG_H
dialog.cpp
#include "dialog.h"
#include "ui_dialog.h"
#include "spellchecker.h"
#include "spellcheckdialog.h"
#include <QMessageBox>
Dialog::Dialog(QWidget *parent) :
QDialog(parent),
ui(new Ui::Dialog)
{
ui->setupUi(this);
connect(ui->buttonCheckSpelling, SIGNAL(clicked()), this, SLOT(checkSpelling()));
}
Dialog::~Dialog()
{
delete ui;
}
void Dialog::checkSpelling()
{
QString dictPath = "/path/to/your/dictionary/de_DE_neu/de_DE_neu";
QString userDict= "/tmp/userDict.txt";
SpellChecker *spellChecker = new SpellChecker(dictPath, userDict);
SpellCheckDialog *checkDialog = new SpellCheckDialog(spellChecker, this);
QTextCharFormat highlightFormat;
highlightFormat.setBackground(QBrush(QColor("#ff6060")));
highlightFormat.setForeground(QBrush(QColor("#000000")));
// alternative format
//highlightFormat.setUnderlineColor(QColor("red"));
//highlightFormat.setUnderlineStyle(QTextCharFormat::SpellCheckUnderline);
// save the position of the current cursor
QTextCursor oldCursor = ui->textEdit->textCursor();
// create a new cursor to walk through the text
QTextCursor cursor(ui->textEdit->document());
// Don't call cursor.beginEditBlock(), as this prevents the rewdraw after changes to the content
// cursor.beginEditBlock();
while(!cursor.atEnd()) {
QCoreApplication::processEvents();
cursor.movePosition(QTextCursor::EndOfWord, QTextCursor::KeepAnchor, 1);
QString word = cursor.selectedText();
// Workaround for better recognition of words
// punctuation etc. does not belong to words
while(!word.isEmpty() && !word.at(0).isLetter() && cursor.anchor() < cursor.position()) {
int cursorPos = cursor.position();
cursor.setPosition(cursor.anchor() + 1, QTextCursor::MoveAnchor);
cursor.setPosition(cursorPos, QTextCursor::KeepAnchor);
word = cursor.selectedText();
}
if(!word.isEmpty() && !spellChecker->spell(word)) {
QTextCursor tmpCursor(cursor);
tmpCursor.setPosition(cursor.anchor());
ui->textEdit->setTextCursor(tmpCursor);
ui->textEdit->ensureCursorVisible();
// highlight the unknown word
QTextEdit::ExtraSelection es;
es.cursor = cursor;
es.format = highlightFormat;
QList<QTextEdit::ExtraSelection> esList;
esList << es;
ui->textEdit->setExtraSelections(esList);
QCoreApplication::processEvents();
// ask the user what to do
SpellCheckDialog::SpellCheckAction spellResult = checkDialog->checkWord(word);
// reset the word highlight
esList.clear();
ui->textEdit->setExtraSelections(esList);
QCoreApplication::processEvents();
if(spellResult == SpellCheckDialog::AbortCheck)
break;
switch(spellResult) {
case SpellCheckDialog::ReplaceOnce:
cursor.insertText(checkDialog->replacement());
break;
default:
break;
}
QCoreApplication::processEvents();
}
cursor.movePosition(QTextCursor::NextWord, QTextCursor::MoveAnchor, 1);
}
//cursor.endEditBlock();
ui->textEdit->setTextCursor(oldCursor);
QMessageBox::information(
this,
tr("Finished"),
tr("The spell check has finished."));
}
spellcheckdialog.ui
<?xml version="1.0" encoding="UTF-8"?>
<ui version="4.0">
<class>SpellCheckDialog</class>
<widget class="QDialog" name="SpellCheckDialog">
<property name="geometry">
<rect>
<x>600</x>
<y>326</y>
<width>701</width>
<height>509</height>
</rect>
</property>
<property name="windowTitle">
<string>Spell Checker</string>
</property>
<layout class="QGridLayout" name="gridLayout">
<item row="0" column="0">
<widget class="QLabel" name="label">
<property name="text">
<string>unknown word:</string>
</property>
</widget>
</item>
<item row="0" column="1">
<widget class="QLabel" name="lblUnknownWord">
<property name="sizePolicy">
<sizepolicy hsizetype="Expanding" vsizetype="Preferred">
<horstretch>0</horstretch>
<verstretch>0</verstretch>
</sizepolicy>
</property>
<property name="text">
<string>TextLabel</string>
</property>
</widget>
</item>
<item row="0" column="2" colspan="2">
<widget class="QPushButton" name="btnAddToDict">
<property name="text">
<string><< add to dictionary</string>
</property>
</widget>
</item>
<item row="1" column="0">
<widget class="QLabel" name="label_2">
<property name="text">
<string>replace with:</string>
</property>
</widget>
</item>
<item row="1" column="1" colspan="2">
<widget class="QLineEdit" name="ledtReplaceWith"/>
</item>
<item row="1" column="3" rowspan="2">
<layout class="QVBoxLayout" name="verticalLayout">
<item>
<widget class="QPushButton" name="btnReplaceOnce">
<property name="text">
<string>replace once</string>
</property>
</widget>
</item>
<item>
<widget class="QPushButton" name="btnReplaceAll">
<property name="text">
<string>replace all</string>
</property>
</widget>
</item>
<item>
<spacer name="verticalSpacer_2">
<property name="orientation">
<enum>Qt::Vertical</enum>
</property>
<property name="sizeType">
<enum>QSizePolicy::Fixed</enum>
</property>
<property name="sizeHint" stdset="0">
<size>
<width>20</width>
<height>13</height>
</size>
</property>
</spacer>
</item>
<item>
<widget class="QPushButton" name="btnIgnoreOnce">
<property name="text">
<string>ignore once</string>
</property>
</widget>
</item>
<item>
<widget class="QPushButton" name="btnIgnoreAll">
<property name="text">
<string>ignore all</string>
</property>
</widget>
</item>
<item>
<spacer name="verticalSpacer">
<property name="orientation">
<enum>Qt::Vertical</enum>
</property>
<property name="sizeHint" stdset="0">
<size>
<width>20</width>
<height>240</height>
</size>
</property>
</spacer>
</item>
<item>
<widget class="QPushButton" name="btnCancel">
<property name="text">
<string>cancel</string>
</property>
</widget>
</item>
</layout>
</item>
<item row="2" column="0" colspan="3">
<widget class="QListWidget" name="listWidget">
<property name="editTriggers">
<set>QAbstractItemView::NoEditTriggers</set>
</property>
<property name="showDropIndicator" stdset="0">
<bool>false</bool>
</property>
</widget>
</item>
</layout>
</widget>
<resources/>
<connections/>
</ui>
spellcheckdialog.h
#ifndef SPELLCHECKDIALOG_H
#define SPELLCHECKDIALOG_H
#include <QDialog>
class SpellChecker;
namespace Ui {
class SpellCheckDialog;
}
class SpellCheckDialog : public QDialog
{
Q_OBJECT
public:
enum SpellCheckAction {AbortCheck, IgnoreOnce, IgnoreAll, ReplaceOnce, ReplaceAll, AddToDict};
explicit SpellCheckDialog(SpellChecker *spellChecker, QWidget *parent = 0);
~SpellCheckDialog();
QString replacement() const;
public slots:
SpellCheckAction checkWord(const QString &word);
protected slots:
void ignoreOnce();
void ignoreAll();
void replaceOnce();
void replaceAll();
void addToDict();
private:
Ui::SpellCheckDialog *ui;
SpellChecker *_spellChecker;
QString _unkownWord;
SpellCheckAction _returnCode;
};
#endif // SPELLCHECKDIALOG_H
spellcheckdialog.cpp
#include "spellcheckdialog.h"
#include "ui_spellcheckdialog.h"
#include "spellchecker.h"
SpellCheckDialog::SpellCheckDialog(SpellChecker *spellChecker, QWidget *parent) :
QDialog(parent),
ui(new Ui::SpellCheckDialog)
{
ui->setupUi(this);
_spellChecker = spellChecker;
connect(ui->listWidget, SIGNAL(currentTextChanged(QString)), ui->ledtReplaceWith, SLOT(setText(QString)));
connect(ui->btnAddToDict, SIGNAL(clicked()), this, SLOT(addToDict()));
connect(ui->btnReplaceOnce, SIGNAL(clicked()), this, SLOT(replaceOnce()));
connect(ui->btnReplaceAll, SIGNAL(clicked()), this, SLOT(replaceAll()));
connect(ui->btnIgnoreOnce, SIGNAL(clicked()), this, SLOT(ignoreOnce()));
connect(ui->btnIgnoreAll, SIGNAL(clicked()), this, SLOT(ignoreAll()));
connect(ui->btnCancel, SIGNAL(clicked()), this, SLOT(reject()));
}
SpellCheckDialog::~SpellCheckDialog()
{
delete ui;
}
SpellCheckDialog::SpellCheckAction SpellCheckDialog::checkWord(const QString &word)
{
_unkownWord = word;
ui->lblUnknownWord->setText(QString("<b>%1</b>").arg(_unkownWord));
ui->ledtReplaceWith->clear();
QStringList suggestions = _spellChecker->suggest(word);
ui->listWidget->clear();
ui->listWidget->addItems(suggestions);
if(suggestions.count() > 0)
ui->listWidget->setCurrentRow(0, QItemSelectionModel::Select);
_returnCode = AbortCheck;
QDialog::exec();
return _returnCode;
}
QString SpellCheckDialog::replacement() const
{
return ui->ledtReplaceWith->text();
}
void SpellCheckDialog::ignoreOnce()
{
_returnCode = IgnoreOnce;
accept();
}
void SpellCheckDialog::ignoreAll()
{
_spellChecker->ignoreWord(_unkownWord);
_returnCode = IgnoreAll;
accept();
}
void SpellCheckDialog::replaceOnce()
{
_returnCode = ReplaceOnce;
accept();
}
void SpellCheckDialog::replaceAll()
{
_returnCode = ReplaceAll;
accept();
}
void SpellCheckDialog::addToDict()
{
_spellChecker->addToUserWordlist(_unkownWord);
_returnCode = AddToDict;
accept();
}
spellchecker.h
#ifndef SPELLCHECKER_H
#define SPELLCHECKER_H
#include <QString>
class Hunspell;
class SpellChecker
{
public:
SpellChecker(const QString &dictionaryPath, const QString &userDictionary);
~SpellChecker();
bool spell(const QString &word);
QStringList suggest(const QString &word);
void ignoreWord(const QString &word);
void addToUserWordlist(const QString &word);
private:
void put_word(const QString &word);
Hunspell *_hunspell;
QString _userDictionary;
QString _encoding;
QTextCodec *_codec;
};
#endif // SPELLCHECKER_H
spellchecker.cpp
#include "spellchecker.h"
#include <QFile>
#include <QTextStream>
#include <QTextCodec>
#include <QStringList>
#include <QDebug>
#include "hunspell/hunspell.hxx"
SpellChecker::SpellChecker(const QString &dictionaryPath, const QString &userDictionary)
{
_userDictionary = userDictionary;
QString dictFile = dictionaryPath + ".dic";
QString affixFile = dictionaryPath + ".aff";
QByteArray dictFilePathBA = dictFile.toLocal8Bit();
QByteArray affixFilePathBA = affixFile.toLocal8Bit();
_hunspell = new Hunspell(affixFilePathBA.constData(), dictFilePathBA.constData());
// detect encoding analyzing the SET option in the affix file
_encoding = "ISO8859-1";
QFile _affixFile(affixFile);
if (_affixFile.open(QIODevice::ReadOnly)) {
QTextStream stream(&_affixFile);
QRegExp enc_detector("^\\s*SET\\s+([A-Z0-9\\-]+)\\s*", Qt::CaseInsensitive);
for(QString line = stream.readLine(); !line.isEmpty(); line = stream.readLine()) {
if (enc_detector.indexIn(line) > -1) {
_encoding = enc_detector.cap(1);
qDebug() << QString("Encoding set to ") + _encoding;
break;
}
}
_affixFile.close();
}
_codec = QTextCodec::codecForName(this->_encoding.toLatin1().constData());
if(!_userDictionary.isEmpty()) {
QFile userDictonaryFile(_userDictionary);
if(userDictonaryFile.open(QIODevice::ReadOnly)) {
QTextStream stream(&userDictonaryFile);
for(QString word = stream.readLine(); !word.isEmpty(); word = stream.readLine())
put_word(word);
userDictonaryFile.close();
} else {
qWarning() << "User dictionary in " << _userDictionary << "could not be opened";
}
} else {
qDebug() << "User dictionary not set.";
}
}
SpellChecker::~SpellChecker()
{
delete _hunspell;
}
bool SpellChecker::spell(const QString &word)
{
// Encode from Unicode to the encoding used by current dictionary
return _hunspell->spell(_codec->fromUnicode(word).constData()) != 0;
}
QStringList SpellChecker::suggest(const QString &word)
{
char **suggestWordList;
// Encode from Unicode to the encoding used by current dictionary
int numSuggestions = _hunspell->suggest(&suggestWordList, _codec->fromUnicode(word).constData());
QStringList suggestions;
for(int i=0; i < numSuggestions; ++i) {
suggestions << _codec->toUnicode(suggestWordList[i]);
free(suggestWordList[i]);
}
return suggestions;
}
void SpellChecker::ignoreWord(const QString &word)
{
put_word(word);
}
void SpellChecker::put_word(const QString &word)
{
_hunspell->add(_codec->fromUnicode(word).constData());
}
void SpellChecker::addToUserWordlist(const QString &word)
{
put_word(word);
if(!_userDictionary.isEmpty()) {
QFile userDictonaryFile(_userDictionary);
if(userDictonaryFile.open(QIODevice::Append)) {
QTextStream stream(&userDictonaryFile);
stream << word << "\n";
userDictonaryFile.close();
} else {
qWarning() << "User dictionary in " << _userDictionary << "could not be opened for appending a new word";
}
} else {
qDebug() << "User dictionary not set.";
}
}
BSD LICENSE 2011, Volker Götz
License removed from source to make code more readable.Added at bottom.