The spelling dictionaries you are interested in appear to be located in the following location (checked on 10.8.4 and 10.6.8):
/System/Library/Services/AppleSpell.service/Contents/Resources/
The word lists are stored in this directory by language, so U.S. English is in the English.lproj
folder.
However, these files are stored in a binary format that I haven't deciphered yet...
Indeed it's possible to extract dictionaries:
Someone else already investigated the issue and published the result in his blog.
The quintessence:
Copy the code below and paste it in a *.c document named dedict.c:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include "zlib.h"
#define CHUNK 16384
/*
40 Length of the zlib stream
4c 0020
54 0275 number of blocks
60 808c pointer to the next block
64 8088 length of the first block
68 047a4a length of the unpacked block
6c start of the zlib stream
80fc second block
13cd134
13cd174
*/
int unpack(unsigned char *in, int len)
{
int ret,outed=0;
unsigned have;
z_stream strm;
unsigned char out[CHUNK];
strm.zalloc = Z_NULL;
strm.zfree = Z_NULL;
strm.opaque = Z_NULL;
strm.avail_in = 0;
strm.next_in = Z_NULL;
ret = inflateInit(&strm);
if (ret != Z_OK)
return ret;
strm.avail_in = len;
strm.next_in = in;
do {
strm.avail_out = CHUNK;
strm.next_out = out;
ret = inflate(&strm, Z_NO_FLUSH);
assert(ret != Z_STREAM_ERROR); /* state not clobbered */
switch (ret) {
case Z_NEED_DICT:
ret = Z_DATA_ERROR; /* and fall through */
case Z_DATA_ERROR:
case Z_MEM_ERROR:
(void)inflateEnd(&strm);
return ret;
}
// printf("%lx %x\n",strm.next_in-in,strm.avail_in);
have = CHUNK - strm.avail_out /* - (outed?0:4)*/;
int off = 0;
/*
while (have - off > 3 && out[off] != '<' && out[1+off] != 'd' && out[2+off] != ':') {
++off;
}*/
if (have - off <= 3) {
fprintf(stderr, "could not find entry\n");
}
if (fwrite(out + off/*+(outed?0:4)*/, have - off, 1, stdout) != 1 || ferror(stdout)) {
(void)inflateEnd(&strm);
return Z_ERRNO;
}
//exit(0);
outed+=have;
} while (strm.avail_out == 0);
printf("%06x\n",outed);
(void)inflateEnd(&strm);
return ret == Z_STREAM_END ? Z_OK : Z_DATA_ERROR;
}
char filename[256];
int main(int argc,char **argv) {
FILE *fin; int limit,blen=0,p,l,bcnt=0; unsigned char *buf=NULL;
assert(argc >= 2);
sprintf(filename,"/Library/Dictionaries/%s.dictionary/Contents/Body.data",argv[1]);
if((fin=fopen(filename,"rb"))) {
fseek(fin,0x40,SEEK_SET);
fread(&l,1,4,fin);
limit=0x40+l;
p=0x60;
do {
fseek(fin,p,SEEK_SET);
fread(&l,1,4,fin);
// if(0==l) break;
if(blen<l) {
if(buf!=NULL) free(buf);
blen=l;
buf=(unsigned char *)malloc(blen);
}
fread(buf,1,l,fin);
//fprintf(stderr, "%x@%06x: %x>%06x\n",bcnt,p,l,((int *)buf)[1]);
unpack(buf+8,l-8);
p+=4+l;
++bcnt;
} while(p<limit);
free(buf);
fclose(fin);
}
return 0;
}
and this code and paste it in a *.c document named strip.c:
// This program strips the first 4 characters from each line in the input
#include <stdio.h>
int main() {
while(!ferror(stdin) && !feof(stdin)) {
size_t len = 0;
char *line = fgetln(stdin, &len);
if (!line) break;
if (len > 4)
fwrite(line + 4, 1, len - 4, stdout);
}
return 0;
}
Then open Terminal and cd to the folder containing the two *.c files and execute (in the example one of the dicts in /Library/Dictionaries/ is used, you may use any other in the same folder and then have to replace the name "Oxford Dictionary of English" in line 3 below):
clang dedict.c -Wall -lz -o dedict
clang strip.c -Wall -o strip
./dedict "Oxford Dictionary of English" | ./strip > dict.xml
egrep -o 'd:title="(.+?)"' dict.xml | awk -F\" '{print $2}' > words
First four results visible in words (it's getting better later):
A
A
a
a
This should work at least in OS X 10.7 - 10.10.
Best Answer
Dictionaries used to be in
/System/Library/Dictionaries
, but they were moved (c. Sierra?) to various subfolders in/System/Library/Assets/com_apple_MobileAsset_DictionaryServices_dictionaryOSX
.The format is a package, and the data files inside seem to be some non-human-readable binary data format, though they start off life as XML files.
Assuming you've searched for projects that can import Apple's dictionaries, rather than just exporting them, and found nothing; then the task may be 'non-trivial', and you may be better off using the very good open source DICT format files.