%!
%%BoundingBox: (atend)
%%Pages: (atend)
%%DocumentFonts: (atend)
%%EndComments
%
% FrameMaker PostScript Prolog 3.0, for use with FrameMaker 3.0
% Copyright (c) 1986,87,89,90,91 by Frame Technology Corporation.
% All rights reserved.
%
% Known Problems:
% Due to bugs in Transcript, the 'PS-Adobe-' is omitted from line 1
/FMversion (3.0) def
% Set up Color vs. Black-and-White
/FMPrintInColor systemdict /colorimage known
systemdict /currentcolortransfer known or def
% Uncomment this line to force b&w on color printer
% /FMPrintInColor false def
/FrameDict 195 dict def
systemdict /errordict known not {/errordict 10 dict def
errordict /rangecheck {stop} put} if
% The readline in 23.0 doesn't recognize cr's as nl's on AppleTalk
FrameDict /tmprangecheck errordict /rangecheck get put
errordict /rangecheck {FrameDict /bug true put} put
FrameDict /bug false put
mark
% Some PS machines read past the CR, so keep the following 3 lines together!
currentfile 5 string readline
00
0000000000
cleartomark
errordict /rangecheck FrameDict /tmprangecheck get put
FrameDict /bug get {
/readline {
/gstring exch def
/gfile exch def
/gindex 0 def
{
gfile read pop
dup 10 eq {exit} if
dup 13 eq {exit} if
gstring exch gindex exch put
/gindex gindex 1 add def
} loop
pop
gstring 0 gindex getinterval true
} def
} if
/FMVERSION {
FMversion ne {
/Times-Roman findfont 18 scalefont setfont
100 100 moveto
(FrameMaker version does not match postscript_prolog!)
dup =
show showpage
} if
} def
/FMLOCAL {
FrameDict begin
0 def
end
} def
/gstring FMLOCAL
/gfile FMLOCAL
/gindex FMLOCAL
/orgxfer FMLOCAL
/orgproc FMLOCAL
/organgle FMLOCAL
/orgfreq FMLOCAL
/yscale FMLOCAL
/xscale FMLOCAL
/manualfeed FMLOCAL
/paperheight FMLOCAL
/paperwidth FMLOCAL
/FMDOCUMENT {
array /FMfonts exch def
/#copies exch def
FrameDict begin
0 ne dup {setmanualfeed} if
/manualfeed exch def
/paperheight exch def
/paperwidth exch def
/yscale exch def
/xscale exch def
currenttransfer cvlit /orgxfer exch def
currentscreen cvlit /orgproc exch def
/organgle exch def /orgfreq exch def
setpapername
manualfeed {true} {papersize} ifelse
{manualpapersize} {false} ifelse
{desperatepapersize} if
end
} def
/pagesave FMLOCAL
/orgmatrix FMLOCAL
/landscape FMLOCAL
/FMBEGINPAGE {
FrameDict begin
/pagesave save def
3.86 setmiterlimit
/landscape exch 0 ne def
landscape {
90 rotate 0 exch neg translate pop
}
{pop pop}
ifelse
xscale yscale scale
/orgmatrix matrix def
gsave
} def
/FMENDPAGE {
grestore
pagesave restore
end
showpage
} def
/FMFONTDEFINE {
FrameDict begin
findfont
ReEncode
1 index exch
definefont
FMfonts 3 1 roll
put
end
} def
/FMFILLS {
FrameDict begin
array /fillvals exch def
end
} def
/FMFILL {
FrameDict begin
fillvals 3 1 roll put
end
} def
/FMNORMALIZEGRAPHICS {
newpath
0.0 0.0 moveto
1 setlinewidth
0 setlinecap
0 0 0 sethsbcolor
0 setgray
} bind def
/fx FMLOCAL
/fy FMLOCAL
/fh FMLOCAL
/fw FMLOCAL
/llx FMLOCAL
/lly FMLOCAL
/urx FMLOCAL
/ury FMLOCAL
/FMBEGINEPSF {
end
/FMEPSF save def
/showpage {} def
FMNORMALIZEGRAPHICS
[/fy /fx /fh /fw /ury /urx /lly /llx] {exch def} forall
fx fy translate
rotate
fw urx llx sub div fh ury lly sub div scale
llx neg lly neg translate
} bind def
/FMENDEPSF {
FMEPSF restore
FrameDict begin
} bind def
FrameDict begin
/setmanualfeed {
%%BeginFeature *ManualFeed True
statusdict /manualfeed true put
%%EndFeature
} def
/max {2 copy lt {exch} if pop} bind def
/min {2 copy gt {exch} if pop} bind def
/inch {72 mul} def
/pagedimen {
paperheight sub abs 16 lt exch
paperwidth sub abs 16 lt and
{/papername exch def} {pop} ifelse
} def
/papersizedict FMLOCAL
/setpapername {
/papersizedict 14 dict def
papersizedict begin
/papername /unknown def
/Letter 8.5 inch 11.0 inch pagedimen
/LetterSmall 7.68 inch 10.16 inch pagedimen
/Tabloid 11.0 inch 17.0 inch pagedimen
/Ledger 17.0 inch 11.0 inch pagedimen
/Legal 8.5 inch 14.0 inch pagedimen
/Statement 5.5 inch 8.5 inch pagedimen
/Executive 7.5 inch 10.0 inch pagedimen
/A3 11.69 inch 16.5 inch pagedimen
/A4 8.26 inch 11.69 inch pagedimen
/A4Small 7.47 inch 10.85 inch pagedimen
/B4 10.125 inch 14.33 inch pagedimen
/B5 7.16 inch 10.125 inch pagedimen
end
} def
/papersize {
papersizedict begin
/Letter {lettertray letter} def
/LetterSmall {lettertray lettersmall} def
/Tabloid {11x17tray 11x17} def
/Ledger {ledgertray ledger} def
/Legal {legaltray legal} def
/Statement {statementtray statement} def
/Executive {executivetray executive} def
/A3 {a3tray a3} def
/A4 {a4tray a4} def
/A4Small {a4tray a4small} def
/B4 {b4tray b4} def
/B5 {b5tray b5} def
/unknown {unknown} def
papersizedict dup papername known {papername} {/unknown} ifelse get
end
/FMdicttop countdictstack 1 add def
statusdict begin stopped end
countdictstack -1 FMdicttop {pop end} for
} def
/manualpapersize {
papersizedict begin
/Letter {letter} def
/LetterSmall {lettersmall} def
/Tabloid {11x17} def
/Ledger {ledger} def
/Legal {legal} def
/Statement {statement} def
/Executive {executive} def
/A3 {a3} def
/A4 {a4} def
/A4Small {a4small} def
/B4 {b4} def
/B5 {b5} def
/unknown {unknown} def
papersizedict dup papername known {papername} {/unknown} ifelse get
end
stopped
} def
/desperatepapersize {
statusdict /setpageparams known
{
paperwidth paperheight 0 1
statusdict begin
{setpageparams} stopped pop
end
} if
} def
/savematrix {
orgmatrix currentmatrix pop
} bind def
/restorematrix {
orgmatrix setmatrix
} bind def
/dmatrix matrix def
/dpi 72 0 dmatrix defaultmatrix dtransform
dup mul exch dup mul add sqrt def
/freq dpi 18.75 div 8 div round dup 0 eq {pop 1} if 8 mul dpi exch div def
/sangle 1 0 dmatrix defaultmatrix dtransform exch atan def
/DiacriticEncoding [
/.notdef /.notdef /.notdef /.notdef /.notdef /.notdef /.notdef
/.notdef /.notdef /.notdef /.notdef /.notdef /.notdef /.notdef
/.notdef /.notdef /.notdef /.notdef /.notdef /.notdef /.notdef
/.notdef /.notdef /.notdef /.notdef /.notdef /.notdef /.notdef
/.notdef /.notdef /.notdef /.notdef /space /exclam /quotedbl
/numbersign /dollar /percent /ampersand /quotesingle /parenleft
/parenright /asterisk /plus /comma /hyphen /period /slash /zero /one
/two /three /four /five /six /seven /eight /nine /colon /semicolon
/less /equal /greater /question /at /A /B /C /D /E /F /G /H /I /J /K
/L /M /N /O /P /Q /R /S /T /U /V /W /X /Y /Z /bracketleft /backslash
/bracketright /asciicircum /underscore /grave /a /b /c /d /e /f /g /h
/i /j /k /l /m /n /o /p /q /r /s /t /u /v /w /x /y /z /braceleft /bar
/braceright /asciitilde /.notdef /Adieresis /Aring /Ccedilla /Eacute
/Ntilde /Odieresis /Udieresis /aacute /agrave /acircumflex /adieresis
/atilde /aring /ccedilla /eacute /egrave /ecircumflex /edieresis
/iacute /igrave /icircumflex /idieresis /ntilde /oacute /ograve
/ocircumflex /odieresis /otilde /uacute /ugrave /ucircumflex
/udieresis /dagger /.notdef /cent /sterling /section /bullet
/paragraph /germandbls /registered /copyright /trademark /acute
/dieresis /.notdef /AE /Oslash /.notdef /.notdef /.notdef /.notdef
/yen /.notdef /.notdef /.notdef /.notdef /.notdef /.notdef
/ordfeminine /ordmasculine /.notdef /ae /oslash /questiondown
/exclamdown /logicalnot /.notdef /florin /.notdef /.notdef
/guillemotleft /guillemotright /ellipsis /.notdef /Agrave /Atilde
/Otilde /OE /oe /endash /emdash /quotedblleft /quotedblright
/quoteleft /quoteright /.notdef /.notdef /ydieresis /Ydieresis
/fraction /currency /guilsinglleft /guilsinglright /fi /fl /daggerdbl
/periodcentered /quotesinglbase /quotedblbase /perthousand
/Acircumflex /Ecircumflex /Aacute /Edieresis /Egrave /Iacute
/Icircumflex /Idieresis /Igrave /Oacute /Ocircumflex /.notdef /Ograve
/Uacute /Ucircumflex /Ugrave /dotlessi /circumflex /tilde /macron
/breve /dotaccent /ring /cedilla /hungarumlaut /ogonek /caron
] def
/ReEncode {
dup
length
dict begin
{
1 index /FID ne
{def}
{pop pop} ifelse
} forall
0 eq {/Encoding DiacriticEncoding def} if
currentdict
end
} bind def
/graymode true def
/bwidth FMLOCAL
/bpside FMLOCAL
/bstring FMLOCAL
/onbits FMLOCAL
/offbits FMLOCAL
/xindex FMLOCAL
/yindex FMLOCAL
/x FMLOCAL
/y FMLOCAL
/setpattern {
/bwidth exch def
/bpside exch def
/bstring exch def
/onbits 0 def /offbits 0 def
freq sangle landscape {90 add} if
{/y exch def
/x exch def
/xindex x 1 add 2 div bpside mul cvi def
/yindex y 1 add 2 div bpside mul cvi def
bstring yindex bwidth mul xindex 8 idiv add get
1 7 xindex 8 mod sub bitshift and 0 ne
{/onbits onbits 1 add def 1}
{/offbits offbits 1 add def 0}
ifelse
}
setscreen
{} settransfer
offbits offbits onbits add div FMsetgray
/graymode false def
} bind def
/grayness {
FMsetgray
graymode not {
/graymode true def
orgxfer cvx settransfer
orgfreq organgle orgproc cvx setscreen
} if
} bind def
/HUE FMLOCAL
/SAT FMLOCAL
/BRIGHT FMLOCAL
/Colors FMLOCAL
FMPrintInColor
{
/HUE 0 def
/SAT 0 def
/BRIGHT 0 def
% array of arrays Hue and Sat values for the separations [HUE BRIGHT]
/Colors
[[0 0 ] % black
[0 0 ] % white
[0.00 1.0] % red
[0.37 1.0] % green
[0.60 1.0] % blue
[0.50 1.0] % cyan
[0.83 1.0] % magenta
[0.16 1.0] % comment / yellow
] def
/BEGINBITMAPCOLOR {
BITMAPCOLOR} def
/BEGINBITMAPCOLORc {
BITMAPCOLORc} def
/BEGINBITMAPTRUECOLOR {
BITMAPTRUECOLOR } def
/BEGINBITMAPTRUECOLORc {
BITMAPTRUECOLORc } def
/K {
Colors exch get dup
0 get /HUE exch store
1 get /BRIGHT exch store
HUE 0 eq BRIGHT 0 eq and
{1.0 SAT sub setgray}
{HUE SAT BRIGHT sethsbcolor}
ifelse
} def
/FMsetgray {
/SAT exch 1.0 exch sub store
HUE 0 eq BRIGHT 0 eq and
{1.0 SAT sub setgray}
{HUE SAT BRIGHT sethsbcolor}
ifelse
} bind def
}
{
/BEGINBITMAPCOLOR {
BITMAPGRAY} def
/BEGINBITMAPCOLORc {
BITMAPGRAYc} def
/BEGINBITMAPTRUECOLOR {
BITMAPTRUEGRAY } def
/BEGINBITMAPTRUECOLORc {
BITMAPTRUEGRAYc } def
/FMsetgray {setgray} bind def
/K {
pop
} def
}
ifelse
/normalize {
transform round exch round exch itransform
} bind def
/dnormalize {
dtransform round exch round exch idtransform
} bind def
/lnormalize {
0 dtransform exch cvi 2 idiv 2 mul 1 add exch idtransform pop
} bind def
/H {
lnormalize setlinewidth
} bind def
/Z {
setlinecap
} bind def
/fillvals FMLOCAL
/X {
fillvals exch get
dup type /stringtype eq
{8 1 setpattern}
{grayness}
ifelse
} bind def
/V {
gsave eofill grestore
} bind def
/N {
stroke
} bind def
/M {newpath moveto} bind def
/E {lineto} bind def
/D {curveto} bind def
/O {closepath} bind def
/n FMLOCAL
/L {
/n exch def
newpath
normalize
moveto
2 1 n {pop normalize lineto} for
} bind def
/Y {
L
closepath
} bind def
/x1 FMLOCAL
/x2 FMLOCAL
/y1 FMLOCAL
/y2 FMLOCAL
/rad FMLOCAL
/R {
/y2 exch def
/x2 exch def
/y1 exch def
/x1 exch def
x1 y1
x2 y1
x2 y2
x1 y2
4 Y
} bind def
/RR {
/rad exch def
normalize
/y2 exch def
/x2 exch def
normalize
/y1 exch def
/x1 exch def
newpath
x1 y1 rad add moveto
x1 y2 x2 y2 rad arcto
x2 y2 x2 y1 rad arcto
x2 y1 x1 y1 rad arcto
x1 y1 x1 y2 rad arcto
closepath
16 {pop} repeat
} bind def
/C {
grestore
gsave
R
clip
} bind def
/FMpointsize FMLOCAL
/F {
FMfonts exch get
FMpointsize scalefont
setfont
} bind def
/Q {
/FMpointsize exch def
F
} bind def
/T {
moveto show
} bind def
/RF {
rotate
0 ne {-1 1 scale} if
} bind def
/TF {
gsave
moveto
RF
show
grestore
} bind def
/P {
moveto
0 32 3 2 roll widthshow
} bind def
/PF {
gsave
moveto
RF
0 32 3 2 roll widthshow
grestore
} bind def
/S {
moveto
0 exch ashow
} bind def
/SF {
gsave
moveto
RF
0 exch ashow
grestore
} bind def
/B {
moveto
0 32 4 2 roll 0 exch awidthshow
} bind def
/BF {
gsave
moveto
RF
0 32 4 2 roll 0 exch awidthshow
grestore
} bind def
/G {
gsave
newpath
normalize translate 0.0 0.0 moveto
dnormalize scale
0.0 0.0 1.0 5 3 roll arc
closepath fill
grestore
} bind def
/A {
gsave
savematrix
newpath
2 index 2 div add exch 3 index 2 div sub exch
normalize 2 index 2 div sub exch 3 index 2 div add exch
translate
scale
0.0 0.0 1.0 5 3 roll arc
restorematrix
stroke
grestore
} bind def
/x FMLOCAL
/y FMLOCAL
/w FMLOCAL
/h FMLOCAL
/xx FMLOCAL
/yy FMLOCAL
/ww FMLOCAL
/hh FMLOCAL
/FMsaveobject FMLOCAL
/FMoptop FMLOCAL
/FMdicttop FMLOCAL
/BEGINPRINTCODE {
/FMdicttop countdictstack 1 add def
/FMoptop count 4 sub def
/FMsaveobject save def
userdict begin
/showpage {} def
FMNORMALIZEGRAPHICS
3 index neg 3 index neg translate
} bind def
/ENDPRINTCODE {
count -1 FMoptop {pop pop} for
countdictstack -1 FMdicttop {pop end} for
FMsaveobject restore
} bind def
/gn {
0
{ 46 mul
cf read pop
32 sub
dup 46 lt {exit} if
46 sub add
} loop
add
} bind def
/str FMLOCAL
/cfs {
/str sl string def
0 1 sl 1 sub {str exch val put} for
str def
} bind def
/ic [
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0223
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0223
0
{0 hx} {1 hx} {2 hx} {3 hx} {4 hx} {5 hx} {6 hx} {7 hx} {8 hx} {9 hx}
{10 hx} {11 hx} {12 hx} {13 hx} {14 hx} {15 hx} {16 hx} {17 hx} {18 hx}
{19 hx} {gn hx} {0} {1} {2} {3} {4} {5} {6} {7} {8} {9} {10} {11} {12}
{13} {14} {15} {16} {17} {18} {19} {gn} {0 wh} {1 wh} {2 wh} {3 wh}
{4 wh} {5 wh} {6 wh} {7 wh} {8 wh} {9 wh} {10 wh} {11 wh} {12 wh}
{13 wh} {14 wh} {gn wh} {0 bl} {1 bl} {2 bl} {3 bl} {4 bl} {5 bl} {6 bl}
{7 bl} {8 bl} {9 bl} {10 bl} {11 bl} {12 bl} {13 bl} {14 bl} {gn bl}
{0 fl} {1 fl} {2 fl} {3 fl} {4 fl} {5 fl} {6 fl} {7 fl} {8 fl} {9 fl}
{10 fl} {11 fl} {12 fl} {13 fl} {14 fl} {gn fl}
] def
/sl FMLOCAL
/val FMLOCAL
/ws FMLOCAL
/im FMLOCAL
/bs FMLOCAL
/cs FMLOCAL
/len FMLOCAL
/pos FMLOCAL
/ms {
/sl exch def
/val 255 def
/ws cfs
/im cfs
/val 0 def
/bs cfs
/cs cfs
} bind def
400 ms
/ip {
is
0
cf cs readline pop
{ ic exch get exec
add
} forall
pop
} bind def
/wh {
/len exch def
/pos exch def
ws 0 len getinterval im pos len getinterval copy pop
pos len
} bind def
/bl {
/len exch def
/pos exch def
bs 0 len getinterval im pos len getinterval copy pop
pos len
} bind def
/s1 1 string def
/fl {
/len exch def
/pos exch def
/val cf s1 readhexstring pop 0 get def
pos 1 pos len add 1 sub {im exch val put} for
pos len
} bind def
/hx {
3 copy getinterval
cf exch readhexstring pop pop
} bind def
/h FMLOCAL
/w FMLOCAL
/d FMLOCAL
/lb FMLOCAL
/bitmapsave FMLOCAL
/is FMLOCAL
/cf FMLOCAL
/wbytes {
dup
8 eq {pop} {1 eq {7 add 8 idiv} {3 add 4 idiv} ifelse} ifelse
} bind def
/BEGINBITMAPBWc {
1 {} COMMONBITMAPc
} bind def
/BEGINBITMAPGRAYc {
8 {} COMMONBITMAPc
} bind def
/BEGINBITMAP2BITc {
2 {} COMMONBITMAPc
} bind def
/COMMONBITMAPc {
/r exch def
/d exch def
gsave
translate rotate scale /h exch def /w exch def
/lb w d wbytes def
sl lb lt {lb ms} if
/bitmapsave save def
r
/is im 0 lb getinterval def
ws 0 lb getinterval is copy pop
/cf currentfile def
w h d [w 0 0 h neg 0 h]
{ip} image
bitmapsave restore
grestore
} bind def
/BEGINBITMAPBW {
1 {} COMMONBITMAP
} bind def
/BEGINBITMAPGRAY {
8 {} COMMONBITMAP
} bind def
/BEGINBITMAP2BIT {
2 {} COMMONBITMAP
} bind def
/COMMONBITMAP {
/r exch def
/d exch def
gsave
translate rotate scale /h exch def /w exch def
/bitmapsave save def
r
/is w d wbytes string def
/cf currentfile def
w h d [w 0 0 h neg 0 h]
{cf is readhexstring pop} image
bitmapsave restore
grestore
} bind def
/proc1 FMLOCAL
/proc2 FMLOCAL
/newproc FMLOCAL
/Fmcc {
/proc2 exch cvlit def
/proc1 exch cvlit def
/newproc proc1 length proc2 length add array def
newproc 0 proc1 putinterval
newproc proc1 length proc2 putinterval
newproc cvx
} bind def
/ngrayt 256 array def
/nredt 256 array def
/nbluet 256 array def
/ngreent 256 array def
/gryt FMLOCAL
/blut FMLOCAL
/grnt FMLOCAL
/redt FMLOCAL
/indx FMLOCAL
/cynu FMLOCAL
/magu FMLOCAL
/yelu FMLOCAL
/k FMLOCAL
/u FMLOCAL
/colorsetup {
currentcolortransfer
/gryt exch def
/blut exch def
/grnt exch def
/redt exch def
0 1 255 {
/indx exch def
/cynu 1 red indx get 255 div sub def
/magu 1 green indx get 255 div sub def
/yelu 1 blue indx get 255 div sub def
/k cynu magu min yelu min def
/u k currentundercolorremoval exec def
nredt indx 1 0 cynu u sub max sub redt exec put
ngreent indx 1 0 magu u sub max sub grnt exec put
nbluet indx 1 0 yelu u sub max sub blut exec put
ngrayt indx 1 k currentblackgeneration exec sub gryt exec put
} for
{255 mul cvi nredt exch get}
{255 mul cvi ngreent exch get}
{255 mul cvi nbluet exch get}
{255 mul cvi ngrayt exch get}
setcolortransfer
{pop 0} setundercolorremoval
{} setblackgeneration
} bind def
/tran FMLOCAL
/fakecolorsetup {
/tran 256 string def
0 1 255 {/indx exch def
tran indx
red indx get 77 mul
green indx get 151 mul
blue indx get 28 mul
add add 256 idiv put} for
currenttransfer
{255 mul cvi tran exch get 255.0 div}
exch Fmcc settransfer
} bind def
/BITMAPCOLOR {
/d 8 def
gsave
translate rotate scale /h exch def /w exch def
/bitmapsave save def
colorsetup
/is w d wbytes string def
/cf currentfile def
w h d [w 0 0 h neg 0 h]
{cf is readhexstring pop} {is} {is} true 3 colorimage
bitmapsave restore
grestore
} bind def
/BITMAPCOLORc {
/d 8 def
gsave
translate rotate scale /h exch def /w exch def
/lb w d wbytes def
sl lb lt {lb ms} if
/bitmapsave save def
colorsetup
/is im 0 lb getinterval def
ws 0 lb getinterval is copy pop
/cf currentfile def
w h d [w 0 0 h neg 0 h]
{ip} {is} {is} true 3 colorimage
bitmapsave restore
grestore
} bind def
/BITMAPTRUECOLORc {
gsave
translate rotate scale /h exch def /w exch def
/bitmapsave save def
/is w string def
ws 0 w getinterval is copy pop
/cf currentfile def
w h 8 [w 0 0 h neg 0 h]
{ip} {gip} {bip} true 3 colorimage
bitmapsave restore
grestore
} bind def
/BITMAPTRUECOLOR {
gsave
translate rotate scale /h exch def /w exch def
/bitmapsave save def
/is w string def
/gis w string def
/bis w string def
/cf currentfile def
w h 8 [w 0 0 h neg 0 h]
{ cf is readhexstring pop }
{ cf gis readhexstring pop }
{ cf bis readhexstring pop }
true 3 colorimage
bitmapsave restore
grestore
} bind def
/BITMAPTRUEGRAYc {
gsave
translate rotate scale /h exch def /w exch def
/bitmapsave save def
/is w string def
ws 0 w getinterval is copy pop
/cf currentfile def
w h 8 [w 0 0 h neg 0 h]
{ip gip bip w gray} image
bitmapsave restore
grestore
} bind def
/ww FMLOCAL
/r FMLOCAL
/g FMLOCAL
/b FMLOCAL
/i FMLOCAL
/gray {
/ww exch def
/b exch def
/g exch def
/r exch def
0 1 ww 1 sub { /i exch def r i get .299 mul g i get .587 mul
b i get .114 mul add add r i 3 -1 roll floor cvi put } for
r
} bind def
/BITMAPTRUEGRAY {
gsave
translate rotate scale /h exch def /w exch def
/bitmapsave save def
/is w string def
/gis w string def
/bis w string def
/cf currentfile def
w h 8 [w 0 0 h neg 0 h]
{ cf is readhexstring pop
cf gis readhexstring pop
cf bis readhexstring pop w gray} image
bitmapsave restore
grestore
} bind def
/BITMAPGRAY {
8 {fakecolorsetup} COMMONBITMAP
} bind def
/BITMAPGRAYc {
8 {fakecolorsetup} COMMONBITMAPc
} bind def
/ENDBITMAP {
} bind def
end
/ALDsave FMLOCAL
/ALDmatrix matrix def ALDmatrix currentmatrix pop
/StartALD {
/ALDsave save def
savematrix
ALDmatrix setmatrix
} bind def
/InALD {
restorematrix
} bind def
/DoneALD {
ALDsave restore
} bind def
%%EndProlog
%%BeginSetup
(3.0) FMVERSION
1 1 595.3 841.9 0 1 14 FMDOCUMENT
0 0 /Times-Bold FMFONTDEFINE
1 0 /Times-Roman FMFONTDEFINE
2 0 /Times-Italic FMFONTDEFINE
3 0 /Times-BoldItalic FMFONTDEFINE
4 0 /Courier FMFONTDEFINE
5 1 /Symbol FMFONTDEFINE
6 0 /Helvetica FMFONTDEFINE
32 FMFILLS
0 0 FMFILL
1 0.1 FMFILL
2 0.3 FMFILL
3 0.5 FMFILL
4 0.7 FMFILL
5 0.9 FMFILL
6 0.97 FMFILL
7 1 FMFILL
8 <0f1e3c78f0e1c387> FMFILL
9 <0f87c3e1f0783c1e> FMFILL
10 FMFILL
11 FMFILL
12 <8142241818244281> FMFILL
13 <03060c183060c081> FMFILL
14 <8040201008040201> FMFILL
16 1 FMFILL
17 0.9 FMFILL
18 0.7 FMFILL
19 0.5 FMFILL
20 0.3 FMFILL
21 0.1 FMFILL
22 0.03 FMFILL
23 0 FMFILL
24 FMFILL
25 FMFILL
26 <3333333333333333> FMFILL
27 <0000ffff0000ffff> FMFILL
28 <7ebddbe7e7dbbd7e> FMFILL
29 FMFILL
30 <7fbfdfeff7fbfdfe> FMFILL
%%EndSetup
%%Page: "1" 1
%%BeginPaperSize: A4
%%EndPaperSize
595.3 841.9 0 FMBEGINPAGE
99.65 96.95 530.23 743.25 R
7 X
0 K
V
0 14 Q
0 X
(PROBABILISTIC REASONING AS INFORMATION) 150.71 733.92 T
-0.99 (COMPRESSION BY MULTIPLE ALIGNMENT, UNIFICATION AND) 99.65 717.92 P
(SEARCH \050I\051: INTRODUCTION) 216.61 701.92 T
1 F
(J Gerard Wolff) 272.59 670.92 T
1 12 Q
(University of Wales, Bangor, UK) 234.33 646.25 T
(gerry@sees.bangor.ac.uk) 254.3 630.25 T
0 F
4.41 (Abstract:) 99.65 598.25 P
1 F
4.41 ( This article and its two accompanying articles introduce the idea that) 148.27 598.25 P
3.91 (probabilistic reasoning \050PrbRs\051 may be understood as information compression by) 99.65 584.25 P
(multiple alignment, unification and search \050ICMAUS\051.) 99.65 570.25 T
0.73 (In this context,) 135.65 554.25 P
2 F
0.73 (multiple alignment) 212.45 554.25 P
1 F
0.73 ( has a meaning which is similar to but distinct) 303.47 554.25 P
-0.74 (from its meaning in bio-informatics, while) 99.65 540.25 P
2 F
-0.74 (unification) 302.11 540.25 P
1 F
-0.74 ( means a simple merging of matching) 354.09 540.25 P
(patterns, a meaning which is related to but simpler than the meaning of that term in logic.) 99.65 526.25 T
0.11 (This article, the first of the three, describes the concept of multiple alignment as it) 135.65 510.25 P
0.56 (has been developed in this research, illustrated with a simple example. Also described is) 99.65 496.25 P
0.68 (the way in which information compression associated with a multiple alignment may be) 99.65 482.25 P
(calculated.) 99.65 468.25 T
0.79 (A software model, SP61, has been developed for the discovery and formation of) 135.65 452.25 P
5.02 (\324good\325 multiple alignments, evaluated in terms of information compression. The) 99.65 438.25 P
(organisation and operation of this model is described in outline.) 99.65 424.25 T
-0.74 (As an introduction to the two accompanying articles, this article describes in outline) 135.65 408.25 P
(how multiple alignments may provide a basis for PrbRs.) 99.65 394.25 T
0 F
5.21 (Key Words:) 99.65 378.25 P
1 F
5.21 ( Probabilistic reasoning; multiple alignment; unification; information) 167.15 378.25 P
(compression.) 99.65 364.25 T
0 F
5.63 (Category:) 99.65 348.25 P
1 F
5.63 ( Computing Methodologies/Artificial Intelligence/Reasoning \050uncertainty,) 150.93 348.25 P
(\324fuzzy\325 and probabilistic\051. I.2.SD.1.2.3.) 99.65 334.25 T
0 F
(1) 99.65 300.25 T
(Introduction) 135.65 300.25 T
1 F
2.02 (Quoting Benjamin Franklin \050\322Nothing is certain but death and taxes\323\051, Ginsberg [16]) 99.65 276.25 P
-0.44 (writes \050p. 2\051 that: \322The view that Franklin was expressing is that virtually every conclusion) 99.65 262.25 P
0.05 (we draw [in reasoning] is an uncertain one.\323 He goes on to say: \322This sort of reasoning in) 99.65 248.25 P
(the face of uncertainty... has ... proved to be remarkably difficult to formalise.\323) 99.65 234.25 T
2.29 (This article is the first of three articles introducing the idea that) 135.65 218.25 P
2 F
2.29 (probabilistic) 468.92 218.25 P
0.77 (reasoning) 99.65 204.25 P
1 F
0.77 ( \050PrbRs\051 may possibly be understood as a process of) 147.62 204.25 P
2 F
0.77 (information compression) 409.19 204.25 P
1 F
(\050IC\051 by) 99.65 190.25 T
2 F
(multiple alignment) 137.62 190.25 T
1 F
( with) 227.91 190.25 T
2 F
(unification) 255.23 190.25 T
1 F
( and) 307.21 190.25 T
2 F
(search) 330.52 190.25 T
1 F
( \050ICMAUS\051.) 362.5 190.25 T
1.18 (In this context, PrbRs will be used quite loosely to mean any kind of reasoning) 135.65 174.25 P
-0.72 (which attempts to allow for uncertainty of any kind) 99.65 160.25 P
2 F
-0.72 (.) 340.36 160.25 P
1 F
-0.72 (This includes kinds of reasoning which) 345.63 160.25 P
3.96 (use default values for variables where specific values are unknown and kinds of) 99.65 146.25 P
0.5 (\324nonmonotonic\325 reasoning which allow probabilistic conclusions reached on the strength) 99.65 132.25 P
(of one state of knowledge to be overturned when new knowledge is acquired.) 99.65 118.25 T
1.34 (In this context,) 135.65 102.25 P
2 F
1.34 (multiple alignment) 214.3 102.25 P
1 F
1.34 ( \050MA\051 has a meaning which is similar to but) 305.93 102.25 P
FMENDPAGE
%%EndPage: "1" 2
%%Page: "2" 2
595.3 841.9 0 FMBEGINPAGE
99.65 771.95 531.65 789.95 R
7 X
0 K
V
1 12 Q
0 X
(- 2 -) 305.66 781.95 T
99.65 96.95 531.65 744.95 R
7 X
V
0 X
0.41 (distinct from its meaning in bio-informatics while) 99.65 736.95 P
2 F
0.41 (unification) 344.71 736.95 P
1 F
0.41 ( means a simple merging of) 396.69 736.95 P
-0.16 (matching patterns, a meaning which is related to but simpler than the meaning of that term) 99.65 722.95 P
(in logic.) 99.65 708.95 T
1.08 (The term) 135.65 692.95 P
2 F
1.08 (search) 184.43 692.95 P
1 F
1.08 ( in this context means the systematic exploration of the abstract) 216.41 692.95 P
4.59 (space of possible alignments, normally constrained in some way \050using heuristic) 99.65 678.95 P
(techniques or otherwise\051 to achieve useful results in realistic timescales.) 99.65 664.95 T
0 F
(1.1) 99.65 630.95 T
(Content) 135.65 630.95 T
1 F
1.25 (This article introduces these proposals and the research programme in which they have) 99.65 606.95 P
-0.09 (been developed. The concept of MA is described as it has been developed in this research.) 99.65 592.95 P
-0.64 (The way in which the IC associated with any alignment may be calculated is described, first) 99.65 578.95 P
0.55 (in qualitative terms with reference to an example and then in more precise terms. This is) 99.65 564.95 P
0.84 (followed by a description of SP61, a software model designed to discover and construct) 99.65 550.95 P
-0.4 (MAs which are \324good\325 in terms of IC. Finally, this article describes in outline how multiple) 99.65 536.95 P
(alignments may provide a basis for PrbRs.) 99.65 522.95 T
0.6 (The second article begins by showing how the probabilities of inferences derived) 135.65 506.95 P
0.71 (from any alignment may be calculated. The remainder of this article provides a range of) 99.65 492.95 P
-0.34 (examples showing how probabilistic inferences may be formed using ICMAUS. Examples) 99.65 478.95 P
(include:) 99.65 464.95 T
(1) 99.65 448.95 T
0.36 (PrbRs associated with probabilistic and fuzzy pattern recognition and information) 135.65 448.95 P
0.21 (retrieval. Associated topics include inheritance of attributes in a class hierarchy or) 135.65 434.95 P
(heterarchy and the recognition of \324polythetic\325 categories.) 135.65 420.95 T
(2) 99.65 404.95 T
(One-step \324deductive\325 reasoning.) 135.65 404.95 T
(3) 99.65 388.95 T
5.62 (The integration of \324deductive\325 and \324abductive\325 reasoning within a single) 135.65 388.95 P
(framework.) 135.65 374.95 T
(4) 99.65 358.95 T
(Chains of reasoning:) 135.65 358.95 T
(\245) 135.65 342.95 T
0.76 (Reasoning with networks or trees \050but excluding Bayesian networks - see) 171.65 342.95 P
(below\051.) 171.65 328.95 T
(\245) 135.65 312.95 T
(Reasoning with \324rules\325.) 171.65 312.95 T
-0.03 (The third article presents further examples showing how other kinds of PrbRs may) 135.65 296.95 P
(be understood as ICMAUS. These include:) 99.65 282.95 T
(1) 99.65 266.95 T
(Hypothetical \050\324what if\325\051 reasoning.) 135.65 266.95 T
(2) 99.65 250.95 T
(\324Indirection\325 in information retrieval.) 135.65 250.95 T
(3) 99.65 234.95 T
(Reasoning with default values.) 135.65 234.95 T
(4) 99.65 218.95 T
(Nonmonotonic reasoning.) 135.65 218.95 T
(5) 99.65 202.95 T
(Solving geometric analogy problems.) 135.65 202.95 T
(6) 99.65 186.95 T
3.36 (Causal reasoning and the phenomenon of \324explaining away\325: ICMAUS as a) 135.65 186.95 P
(possible alternative to Bayesian networks.) 135.65 172.95 T
0.36 (The article ends with a brief review of how these proposals may be evaluated and) 135.65 156.95 P
(some concluding remarks.) 99.65 142.95 T
-0.5 (Definitions of terms used in this article and the other two are presented in Appendix) 135.65 126.95 P
(A1.) 99.65 112.95 T
FMENDPAGE
%%EndPage: "2" 3
%%Page: "3" 3
595.3 841.9 0 FMBEGINPAGE
99.65 771.95 531.65 789.95 R
7 X
0 K
V
1 12 Q
0 X
(- 3 -) 305.66 781.95 T
99.65 96.95 531.65 744.95 R
7 X
V
0 F
0 X
(1.2) 99.65 736.95 T
(\324Reasoning\325 and \324inference\325) 135.65 736.95 T
1 F
-0.49 (Before we proceed, some clarification is necessary for the meanings of the terms) 99.65 712.95 P
2 F
-0.49 (reasoning) 483.67 712.95 P
1 F
(and) 99.65 698.95 T
2 F
(inference) 119.96 698.95 T
1 F
( as they will be used in this article.) 164.59 698.95 T
2.2 (The term \324reasoning\325 will be used to mean any process of \322going beyond the) 135.65 682.95 P
-0.53 (information given\323. In most forms of \324deductive\325 reasoning, conclusions are either \324true\325 or) 99.65 668.95 P
0.3 (\324false\325, whereas in \324probabilistic reasoning\325 \050PrbRs\051, conclusions are rarely totally true or) 99.65 654.95 P
-0.25 (false but have some kind of \324probability\325 attaching to them, e.g. \322Black clouds mean that it) 99.65 640.95 P
(will probably rain\323.) 99.65 626.95 T
-0.37 (All-or-nothing \324deductive\325 reasoning as it appears in logic will not be considered in) 135.65 610.95 P
3.54 (this article, except in passing. However, given G\232del\325s demonstration that there is) 99.65 596.95 P
-0.44 (irreducible uncertainty associated with all but the simplest kinds of formal system, it seems) 99.65 582.95 P
3.43 (that deductive reasoning might usefully be treated as probabilistic reasoning where) 99.65 568.95 P
-0.21 (probability values are close to 0 or 1. Exploring the implications of that idea would take us) 99.65 554.95 P
(well beyond the scope of this article.) 99.65 540.95 T
-0.18 (As it is generally used, the term \324inference\325 has three distinct but related meanings:) 135.65 524.95 P
(\245) 99.65 508.95 T
0.13 (It is sometimes used to mean a process of constructing a grammar or other kind of) 135.65 508.95 P
(knowledge structure by \324induction\325 from a body of \324raw\325 data.) 135.65 494.95 T
(\245) 99.65 478.95 T
-0.26 (It is also often used with essentially the same meaning as probabilistic reasoning as) 135.65 478.95 P
(described above.) 135.65 464.95 T
(\245) 99.65 448.95 T
(It may also be used to refer to the) 135.65 448.95 T
2 F
(result) 299.19 448.95 T
1 F
( or) 326.51 448.95 T
2 F
(product) 342.5 448.95 T
1 F
( of a process of PrbRs.) 379.81 448.95 T
2.07 (Only the second and third of these three meanings will be used in this article) 135.65 432.95 P
0.49 (although it is anticipated that, in research still to be done, the ICMAUS concepts may be) 99.65 418.95 P
0.51 (extended with relatively little modification to model unsupervised grammatical inference) 99.65 404.95 P
(and similar processes of unsupervised inductive learning.) 99.65 390.95 T
1 10 Q
(1) 374.45 395.75 T
0 12 Q
(1.3) 99.65 356.95 T
(Background and context) 135.65 356.95 T
1 F
2.25 (The proposals in these articles have been developed within a programme of research) 99.65 332.95 P
(developing the \324SP\325 conjecture that:) 99.65 318.95 T
2 F
-0.31 (All kinds of computing and formal reasoning may usefully be understood as) 135.65 302.95 P
(information compression by multiple alignment, unification and search,) 135.65 288.95 T
1 F
(and developing a \324new generation\325 computing system based on this thinking.) 99.65 272.95 T
1 10 Q
(2) 468.41 277.75 T
1 12 Q
(A subsidiary conjecture, which is implied by the main conjecture, is that) 135.65 256.95 T
2 F
4.06 (All kinds of information compression may be understood as pattern) 135.65 240.95 P
99.65 212.95 531.65 227.93 C
99.65 212.95 531.65 227.93 R
7 X
0 K
V
108.65 225.91 252.65 225.91 2 L
V
0.5 H
2 Z
0 X
N
-8.35 24.95 603.65 816.95 C
1 12 Q
0 X
0 K
0.91 (1. As noted in Section 1.3, the application of ICMAUS concepts to learning has not yet) 99.65 204.95 P
0.18 (been seriously attempted but this entire research programme originated in earlier research) 99.65 190.95 P
1.15 (on language learning \050and other kinds of learning\051 based on similar principles \050see [54,) 99.65 176.95 P
(55]\051.) 99.65 162.95 T
0.23 (2. IC may be interpreted as a process of removing unnecessary \050redundant\051 complexity in) 99.65 146.95 P
-0.17 (information - and thus maximising) 99.65 132.95 P
2 F
-0.17 (simplicity) 268.36 132.95 P
1 F
-0.17 ( - whilst preserving as much as possible of its) 315 132.95 P
0.82 (non-redundant descriptive) 99.65 118.95 P
2 F
0.82 (power) 229.85 118.95 P
1 F
0.82 (. Hence the name \324SP\325 applied to the central conjecture) 259.84 118.95 P
(and other aspects of this research.) 99.65 104.95 T
FMENDPAGE
%%EndPage: "3" 4
%%Page: "4" 4
595.3 841.9 0 FMBEGINPAGE
99.65 771.95 531.65 789.95 R
7 X
0 K
V
1 12 Q
0 X
(- 4 -) 305.66 781.95 T
99.65 96.95 531.65 744.95 R
7 X
V
2 F
0 X
(matching, unification and search.) 135.65 736.95 T
1 F
2.72 (Thus, as a working hypothesis, it is assumed that well-known techniques for) 135.65 720.95 P
0.41 (compressing information such as arithmetic coding, wavelet coding, the use of vectors to) 99.65 706.95 P
-0.53 (describe diagrams, and other techniques, may, ultimately, be understood in terms of pattern) 99.65 692.95 P
-0.32 (matching, unification and search. Whether or not this is true is a question which is likely to) 99.65 678.95 P
(remain open for some time.) 99.65 664.95 T
-0.17 (Background thinking for this research programme is described in [52] and [47]. To) 135.65 648.95 P
(date, the concepts have been developed in relation to the following fields:) 99.65 634.95 T
(\245) 99.65 618.95 T
-0.36 (The whole current research programme is based on an extensive earlier programme) 135.65 618.95 P
0.69 (of research into the) 135.65 604.95 P
0 F
0.69 (unsupervised inductive learning of natural languages and) 233.64 604.95 P
-0.64 (non-linguistic cognitive structures) 135.65 590.95 P
1 F
-0.64 ( \050see [53, 54, 55] and earlier work cited in those) 308.27 590.95 P
4.47 (sources\051. That said, the current SP model has not yet been developed to) 135.65 576.95 P
0.11 (accommodate learning. This is planned as the next main phase in the development) 135.65 562.95 P
(of the model) 135.65 548.95 T
(\245) 99.65 532.95 T
0 F
2.69 (Probabilistic Inference) 135.65 532.95 P
1 F
2.69 (. These three articles are the main source to date but) 255.26 532.95 P
-0.64 (relevant ideas have been developed in [43], \050a discussion of causal inferences in this) 135.65 518.95 P
2.86 (framework\051, [44] \050\324logic\325 as ICMAUS\051 and [46] \050ICMAUS as an integrating) 135.65 504.95 P
(framework for different kinds of inference\051.) 135.65 490.95 T
(\245) 99.65 474.95 T
0 F
4.32 (Best-Match Information Retrieval and Pattern Recognition) 135.65 474.95 P
1 F
4.32 (. Article [50]) 460.72 474.95 P
0.18 (describes SP21 which is essentially a version of dynamic programming which can) 135.65 460.95 P
4.09 (find good partial matches between two patterns and which has advantages) 135.65 446.95 P
(compared with standard forms of dynamic programming.) 135.65 432.95 T
(\245) 99.65 416.95 T
0 F
0.86 (Multi-Level Parsing of Natural and Artificial Languages) 135.65 416.95 P
1 F
0.86 (. A recent version of) 429.97 416.95 P
0.14 (the model \050SP52\051, described in [42] has been developed as a model of \324parsing\325 in) 135.65 402.95 P
2.29 (the sense understood in theoretical linguistics and natural language processing) 135.65 388.95 P
-0 (where a one-dimensional \324sentence\325 or other input pattern is analysed into labelled) 135.65 374.95 P
2.96 (parts, subparts, subsubparts etc at arbitrarily many \324levels\325 in an hierarchical) 135.65 360.95 P
(grammar.) 135.65 346.95 T
(\245) 99.65 330.95 T
0 F
(Automation of software design) 135.65 330.95 T
1 F
( and the) 292.55 330.95 T
0 F
(execution of software functions) 333.52 330.95 T
1 F
( [51].) 492.4 330.95 T
(\245) 99.65 314.95 T
0.97 (It has been argued [45] that) 135.65 314.95 P
0 F
0.97 (the ICMAUS model may be seen as a \324universal\325) 276.03 314.95 P
-0.43 (model of computing) 135.65 300.95 P
1 F
-0.43 ( which may be regarded as an interpretation and augmentation) 236.74 300.95 P
-0.31 (of the Turing \324universal\325 model of computing and equivalent models such as Post\325s) 135.65 286.95 P
(\324Canonical System\325.) 135.65 272.95 T
(\245) 99.65 256.95 T
1.28 (Although it has not yet been the subject of any article or report in this research) 135.65 256.95 P
-0.34 (programme, it should be said that the whole framework of ideas embraces) 135.65 242.95 P
0 F
-0.34 (a theory) 489.68 242.95 P
(of knowledge representation) 135.65 228.95 T
1 F
(. In brief, this theory is that:) 280.89 228.95 T
(\245) 135.65 212.95 T
1.3 (All kinds of knowledge may be represented by) 171.65 212.95 P
2 F
1.3 (patterns) 409.2 212.95 P
1 F
1.3 ( \050of one or more) 448.52 212.95 P
(dimensions\051 as defined in Appendix A1.) 171.65 198.95 T
(\245) 135.65 182.95 T
1.72 (Directly or indirectly, all kinds of knowledge about the \324world\325 may be) 171.65 182.95 P
(derived by ICMAUS from \324raw\325 data about the \324world\325.) 171.65 168.95 T
0.48 (Regarding the last point, the way in which the structure of natural languages may) 135.65 152.95 P
1.04 (be represented as patterns is described in [42]. Many of the ideas in that article may be) 99.65 138.95 P
0.94 (applied to the representation of non-linguistic knowledge. Article [40] shows how some) 99.65 124.95 P
1.05 (other kinds of knowledge structure \050networks, trees and rules; and class hierarchies and) 99.65 110.95 P
FMENDPAGE
%%EndPage: "4" 5
%%Page: "5" 5
595.3 841.9 0 FMBEGINPAGE
99.65 771.95 531.65 789.95 R
7 X
0 K
V
1 12 Q
0 X
(- 5 -) 305.66 781.95 T
99.65 96.95 531.65 744.95 R
7 X
V
0 X
3.56 (heterarchies with inheritance of attributes\051 may be accommodated in the ICMAUS) 99.65 736.95 P
(framework.) 99.65 722.95 T
0.43 (Again, with regard to the idea that all emperical knowledge is derived from \324raw\325) 135.65 706.95 P
0.73 (data by inductive learning, an implication is that, in using any kind of knowledge which) 99.65 692.95 P
0.02 (purports to be empirical, we should take account of the fact that this knowledge is derived) 99.65 678.95 P
1.77 (from raw data which had spatial or temporal dimensions, or both. Some or all of this) 99.65 664.95 P
0.44 (information about spatial and/or temporal dimensions may have been lost \050e.g., when 2D) 99.65 650.95 P
0.47 (information is approximated using 1D patterns\051 but we should avoid using the abstracted) 99.65 636.95 P
0.13 (information in any way that is inconsistent with the 2-, 3-, or 4-dimensional origins of the) 99.65 622.95 P
(information.) 99.65 608.95 T
0 F
(1.3.1) 99.65 574.95 T
(Potential benefits of integration) 135.65 574.95 T
1 F
3.23 (If, as conjectured, it proves possible to understand a wide variety of concepts and) 99.65 550.95 P
0.28 (phenomena in cognition and computing in terms of ICMAUS, this should bring the kinds) 99.65 536.95 P
(of benefits which one would expect from any \050good\051 scientific theory:) 99.65 522.95 T
(\245) 99.65 506.95 T
2.66 (Simplification of one\325s thinking about the phenomena covered by the theory.) 135.65 506.95 P
0.25 (Instead of using a multiplicity of concepts which have little or no connection with) 135.65 492.95 P
0.15 (each other, one need deal only with the concepts in the theory and these should be) 135.65 478.95 P
(relatively simple.) 135.65 464.95 T
(\245) 99.65 448.95 T
0.25 (New insights and predictions of phenomena beyond the scope of the concepts and) 135.65 448.95 P
1.17 (observations from which the theory was derived. Some of these possibilities are) 135.65 434.95 P
(discussed in Chapter 6 of [53].) 135.65 420.95 T
(In terms of practicalities, the anticipated benefits of an SP system include:) 135.65 404.95 T
(\245) 99.65 388.95 T
2.42 (The expectation that the relatively sophisticated search mechanisms in the SP) 135.65 388.95 P
0.2 (system will be applicable across a wide range of applications thus saving the need) 135.65 374.95 P
1.2 (for the re-programming of equivalent mechanisms in each of those applications.) 135.65 360.95 P
(This means:) 135.65 346.95 T
(\245) 135.65 330.95 T
(Lower costs in developing and maintaining diverse systems.) 171.65 330.95 T
(\245) 135.65 314.95 T
5.13 (Greater \324quality\325, especially reliability, because the opportunities to) 171.65 314.95 P
0.3 (introduce bugs will be reduced and there is a better chance of weeding out) 171.65 300.95 P
(bugs in a single core system instead several diverse systems.) 171.65 286.95 T
(\245) 99.65 270.95 T
-0.12 (Provision of a generic core for diverse applications should facilitate the integration) 135.65 270.95 P
(of those applications and should reduce incompatibilities between applications.) 135.65 256.95 T
0 F
(1.4) 99.65 222.95 T
(Related research and novelty of the present proposals) 135.65 222.95 T
1 F
0.23 (As an attempt to integrate concepts across several areas of computing, the SP programme) 99.65 198.95 P
3.06 (naturally has many connections with other research. Some of these connections are) 99.65 184.95 P
(described in [53, 52, 48].) 99.65 170.95 T
0.25 (The closest links are with work on Algorithmic Information Theory \050AIT, see, for) 135.65 154.95 P
0.2 (example, [22]\051 and Minimum Length Encoding \050MLE\051. The latter is an umbrella term for) 99.65 140.95 P
0.36 (Minimum Description Length encoding \050MDL\051 and Minimum Message Length encoding) 99.65 126.95 P
0.83 (\050MML\051 - see, for example, [31, 37, 28, 4, 13]. MLE is itself closely related to Bayesian) 99.65 112.95 P
FMENDPAGE
%%EndPage: "5" 6
%%Page: "6" 6
595.3 841.9 0 FMBEGINPAGE
99.65 771.95 531.65 789.95 R
7 X
0 K
V
1 12 Q
0 X
(- 6 -) 305.66 781.95 T
99.65 96.95 531.65 744.95 R
7 X
V
0 X
(inference \050see, for example, [7, 26]\051.) 99.65 736.95 T
0 F
(1.4.1) 99.65 702.95 T
(The key idea in AIT and MLE) 135.65 702.95 T
1 F
-0.19 (The MLE principle may be regarded as a relatively precise statement of Occam\325s Razor. It) 99.65 678.95 P
2.25 (appears that the MLE principle was first proposed by Solomonoff [31] in relation to) 99.65 664.95 P
3.33 (\324inductive inference\325, meaning the unsupervised \324learning\325 of a grammar or similar) 99.65 650.95 P
0.11 (knowledge structure from \324raw\325 data \050see Section 1.2\051. In that context, the key idea is that) 99.65 636.95 P
2.18 (learning \050\324inference\325\051 may be seen as lossless compression of \324raw\325 data and that,) 99.65 622.95 P
3 F
2.18 (in) 521.65 622.95 P
0.61 (measuring compression, we must measure the size of the raw data in its encoded form) 99.65 608.95 P
(together with the size of the code patterns which are needed to create the encoding) 99.65 594.95 T
1 F
(.) 505.43 594.95 T
0.08 (Using the total of code size \050C\051 and size of encoded data \050D) 135.65 578.95 P
1 10 Q
0.06 (C) 423.01 575.95 P
1 12 Q
0.08 (\051 appears to give us a) 429.68 578.95 P
2.15 (handle on some subtle problems in inductive inference and in the related problem of) 99.65 564.95 P
1.07 (understanding how children learn a first language. With regard to the latter, we need to) 99.65 550.95 P
-0.3 (understand how children can learn to discriminate \324correct\325 generalisations of grammatical) 99.65 536.95 P
-0.39 (rules from incorrect generalisations \050e.g., \322I was hitted\323\051 when, by definition, both kinds of) 99.65 522.95 P
0.34 (generalisation have zero frequency in the child\325s experience. Given that there is evidence) 99.65 508.95 P
2.28 (that children do not depend on adults for the correction of overgeneralisations, MLE) 99.65 494.95 P
0.56 (provides a neat explanation of how the two kinds of generalisation can be discriminated:) 99.65 480.95 P
0.3 (\324correct\325 generalisations yield an overall reduction in \050C + D) 99.65 466.95 P
1 10 Q
0.25 (C) 393.26 463.95 P
1 12 Q
0.3 (\051 via a reduction in the size) 399.92 466.95 P
1.33 (of C without a corresponding increase in D) 99.65 452.95 P
1 10 Q
1.11 (C) 315.82 449.95 P
1 12 Q
1.33 (, while \324incorrect\325 generalisations yield an) 322.48 452.95 P
0.97 (overall increase in \050C + D) 99.65 438.95 P
1 10 Q
0.81 (C) 228.84 435.95 P
1 12 Q
0.97 (\051 via a reduction in C which is more than outweighed by an) 235.51 438.95 P
(increase in D) 99.65 424.95 T
1 10 Q
(C) 162.93 421.95 T
1 12 Q
( \050see [54]\051.) 169.59 424.95 T
0 F
(1.4.2) 99.65 390.95 T
(Research on probabilistic reasoning) 135.65 390.95 T
1 F
0.64 (There is now a huge literature on PrbRs. A well-known authoritative survey of the field,) 99.65 366.95 P
0.35 (with an emphasis on Bayesian networks, is provided by [24] although this source is now,) 99.65 352.95 P
(perhaps, in need of updating. A useful review from the same year is [17].) 99.65 338.95 T
-0.49 (A more recent collection of articles, which together provide a broad coverage of the) 135.65 322.95 P
1.97 (subject, appears in [13]. A relatively short but useful review of \322uncertainty handling) 99.65 308.95 P
0.12 (formalisms\323 is provided by [24]. Regarding the application of different kinds of \324logic\325 to) 99.65 294.95 P
-0.28 (nonmonotonic and uncertain reasoning, there is a mine of useful information in the articles) 99.65 280.95 P
1.33 (in [13] covering such things as \324default logic\325, \324autoepistemic logic\325, \324circumscription\325,) 99.65 266.95 P
-0.15 (\324defeasible logic\325, \324uncertainty logics\325 and \324possibilistic logic\325. In that volume, the chapter) 99.65 252.95 P
1.87 (by Ginsberg [16] provides an excellent introduction to the problems of nonmonotonic) 99.65 238.95 P
(reasoning.) 99.65 224.95 T
(Other recent articles in this area include [31], [20], [32], [20], [8] and [5].) 135.65 208.95 T
0 F
(1.4.3) 99.65 174.95 T
(Information compression and probabilistic reasoning) 135.65 174.95 T
1 F
1.78 (Naturally enough, much of the literature on probabilistic reasoning deals directly with) 99.65 150.95 P
-0.19 (concepts of probability, especially conditional probability. Since, however, there is a close) 99.65 136.95 P
-0.37 (connection between probability and compression \050mediated by coding schemes such as the) 99.65 122.95 P
2.58 (Huffman coding scheme - see, for example, [8] - or the Shannon-Fano-Elias coding) 99.65 108.95 P
FMENDPAGE
%%EndPage: "6" 7
%%Page: "7" 7
595.3 841.9 0 FMBEGINPAGE
99.65 771.95 531.65 789.95 R
7 X
0 K
V
1 12 Q
0 X
(- 7 -) 305.66 781.95 T
99.65 96.95 531.65 744.95 R
7 X
V
0 X
(scheme,) 99.65 736.95 T
2 F
(ibid) 141.61 736.95 T
1 F
(.\051, concepts of probability imply corresponding concepts of compression.) 160.27 736.95 T
0.39 (That said, a primary emphasis on compression rather than probability provides an) 135.65 720.95 P
1.72 (alternative perspective on the subject which may prove useful. Apart from the present) 99.65 706.95 P
0.24 (work, relevant sources in that connection include [31], [37], [38], [36], [22], [9], [17] and) 99.65 692.95 P
([18].) 99.65 678.95 T
0 F
(1.4.4) 99.65 644.95 T
(Distinctive features of this research) 135.65 644.95 T
1 F
-0.24 (In relation to other research in PrbRs, the most distinctive features of the present proposals) 99.65 620.95 P
(appear to be:) 99.65 606.95 T
(\245) 99.65 590.95 T
0.4 (Part of the foundation of this research is the conjecture that all kinds of regularity) 135.65 590.95 P
0.69 (\050redundancy\051 in the world may be expressed as) 135.65 573.48 P
2 F
0.69 (patterns) 370.3 573.48 P
1 10 Q
0.57 (3) 409.62 578.29 P
1 12 Q
0.69 ( of entities or events \050in) 414.61 573.48 P
1.54 (one or more dimensions\051 which co-occur with a relatively high frequency [47].) 135.65 559.48 P
-0.5 (Coupled with the idea that all kinds of redundancy may be reduced or eliminated by) 135.65 545.48 P
0.43 (the unification of matching patterns of symbols \050including subsequences of larger) 135.65 531.48 P
-0.04 (patterns which may be coherent or discontinuous within those larger patterns\051, this) 135.65 517.48 P
(conjecture seems to be distinctive of the research programme.) 135.65 503.49 T
(\245) 99.65 487.49 T
2.69 (The derivation of all probability values from absolute frequencies of patterns) 135.65 487.49 P
-0.59 (contrasts with the storage of conditional probabilities which is a feature of Bayesian) 135.65 473.49 P
(networks and other systems for PrbRs \050see, for example, [25]\051.) 135.65 459.49 T
(\245) 99.65 443.49 T
1.45 (The idea that ICMAUS may provide a framework for understanding PrbRs and) 135.65 443.49 P
1.69 (integrating different kinds of PrbRs appears to be entirely novel. It seems that,) 135.65 429.49 P
(currently, no other researchers are developing this idea.) 135.65 415.49 T
(\245) 99.65 399.49 T
1.63 (The idea that, in addition to integrating different kinds of PrbRs, the ICMAUS) 135.65 399.49 P
-0.55 (framework may integrate concepts of PrbRs with several other kinds of \322computing) 135.65 385.49 P
(and formal reasoning\323 seems to be distinctive of this research.) 135.65 371.49 T
(\245) 99.65 355.49 T
-0.34 (The concept of MA as it has been developed in this research differs, in a non-trivial) 135.65 355.49 P
(way, from MA as it is normally conceived in bio-informatics \050see Section 2.1\051.) 135.65 341.49 T
0 F
(1.5) 99.65 307.49 T
(Scope and orientation of these articles and of the research programme) 135.65 307.49 T
1 F
-0.74 (The primary purpose of these three articles is to present the ICMAUS proposals and attempt) 99.65 283.49 P
2.75 (to demonstrate their usefulness for understanding PrbRs. Space does not permit any) 99.65 269.49 P
1.16 (detailed comparison between the ICMAUS proposals for PrbRs and alternative systems) 99.65 255.49 P
1 (such as Markov or Bayesian networks, the Dempster-Shafer theory, and others \050see, for) 99.65 241.49 P
(example, [13]; [24]\051.) 99.65 227.49 T
3.3 (It cannot be emphasised too strongly that) 135.65 211.49 P
3 F
3.3 (the overall goal of this research) 358.92 211.49 P
3.93 (programme is the integration of diverse ideas across subfields of cognition and) 99.65 197.49 P
2.63 (computing) 99.65 183.49 P
1 F
2.63 (. As with any good theory in science, the unifying framework which this) 152.3 183.49 P
-0.34 (research aims to develop should be as) 99.65 169.49 P
3 F
-0.34 (simple) 281.49 169.49 P
1 F
-0.34 ( as possible, without sacrificing descriptive or) 313.48 169.49 P
0.19 (explanatory) 99.65 155.49 P
3 F
0.19 (power) 159.45 155.49 P
1 F
0.19 (. Thus) 189.44 155.49 P
2 F
0.19 (simplicity) 222.79 155.49 P
1 F
0.19 ( and) 269.43 155.49 P
2 F
0.19 (power) 293.12 155.49 P
1 F
0.19 ( are the mainstay of both the SP theory and) 323.11 155.49 P
(its evaluation.) 99.65 141.49 T
99.65 112.95 531.65 127.93 C
99.65 112.95 531.65 127.93 R
7 X
0 K
V
108.65 125.91 252.65 125.91 2 L
V
0.5 H
2 Z
0 X
N
-8.35 24.95 603.65 816.95 C
1 12 Q
0 X
0 K
(3. For definitions of terms, see Appendix A1.) 99.65 104.95 T
FMENDPAGE
%%EndPage: "7" 8
%%Page: "8" 8
595.3 841.9 0 FMBEGINPAGE
99.65 771.95 531.65 789.95 R
7 X
0 K
V
1 12 Q
0 X
(- 8 -) 305.66 781.95 T
99.65 96.95 531.65 744.95 R
7 X
V
0 F
0 X
(1.6) 99.65 736.95 T
(Theory and applications) 135.65 736.95 T
1 F
0.87 (Although the abstract model which is being developed is expressed in a software model) 99.65 712.95 P
-0.03 (\050SP61, described in Section 6\051 the primary purpose in developing this working system has) 99.65 698.95 P
1.38 (been to provide a \324test bed\325 for the abstract model and a vehicle for demonstrating the) 99.65 684.95 P
0.02 (concepts as they have been developed to date. SP61 is) 99.65 670.95 P
2 F
0.02 (not) 362.63 670.95 P
1 F
0.02 ( intended as a system to be used) 377.96 670.95 P
(in practical applications and should not be evaluated as such.) 99.65 656.95 T
0.51 (Although the primary focus of the current work is the development of an abstract) 135.65 640.95 P
-0.1 (model, the need to avoid insuperable obstacles to practical applications in the future has at) 99.65 626.95 P
(all times been born in mind.) 99.65 612.95 T
0.53 (In particular, the development of the model has, at all times, been conditioned by) 135.65 596.95 P
-0.71 (the need to ensure that the computational processes in the model would be scaleable to large) 99.65 582.95 P
-0.1 (applications. As noted in Section 6.1, the estimated \324big O\325 values for the time complexity) 99.65 568.95 P
1.31 (and space complexity of the model are satisfactory, especially if high levels of parallel) 99.65 554.95 P
(processing are applied.) 99.65 540.95 T
1.86 (As indicated above, it is envisaged that mature versions of the SP system will) 135.65 524.95 P
-0.49 (exploit high levels of parallel processing to ensure that there is sufficient power to drive the) 99.65 510.95 P
0.24 (relatively \324expensive\325 search for \324good\325 matches which is at the heart of the model. At all) 99.65 496.95 P
0.73 (times, this objective has been kept in view to ensure that the model can be implemented) 99.65 482.95 P
(within a high-parallel environment when that is required.) 99.65 468.95 T
0 F
(2) 99.65 434.95 T
(Multiple alignment problems) 135.65 434.95 T
1 F
1.57 (The term) 99.65 410.95 P
2 F
1.57 (multiple alignment) 149.42 410.95 P
1 F
1.57 ( is normally associated with the computational analysis of) 241.27 410.95 P
-0.58 (\050symbolic representations of\051 sequences of DNA bases or sequences of amino acid residues) 99.65 396.95 P
4.33 (as part of the process of elucidating the structure, functions or evolution of the) 99.65 382.95 P
-0.19 (corresponding molecules. The aim of the computation is to find one or more alignments of) 99.65 368.95 P
0.94 (matching symbols in two or more sequences which are, in some sense, \324good\325. Possible) 99.65 354.95 P
0.35 (meanings for that term are discussed in Section 4, below. An example of an alignment of) 99.65 340.95 P
(DNA sequences is shown in Figure 1.) 99.65 326.95 T
(--------------------------------------------------------------------) 135.65 310.95 T
(Figure 1 about here) 171.65 294.95 T
(--------------------------------------------------------------------) 135.65 278.95 T
0 F
(Figure 1) 135.65 254.95 T
1 F
-0.67 (A \324good\325 alignment amongst five DNA sequences \050adapted from Fig. 6 in [29], with) 135.65 238.95 P
(permission from Oxford University Press\051.) 135.65 224.95 T
2.99 (In this area of research, it is widely recognised that the number of possible) 135.65 200.95 P
2.22 (alignments of symbols is normally too large to be searched exhaustively and that, to) 99.65 186.95 P
0.1 (achieve a search which has acceptable speed and acceptable scaling properties, \324heuristic\325) 99.65 172.95 P
-0.47 (techniques must normally be used. Heuristic techniques include \324hill climbing\325 \050sometimes) 99.65 158.95 P
1.79 (called \324descent\325\051, \324beam search\325, \324genetic algorithms\325, \324simulated annealing\325, \324dynamic) 99.65 144.95 P
2.38 (programming\325 and others. With these techniques, searching is done in stages, with a) 99.65 130.95 P
0.87 (progressive narrowing of the search in successive stages using some kind of measure of) 99.65 116.95 P
-0.56 (goodness of alignments to guide the search. These techniques may be described generically) 99.65 102.95 P
FMENDPAGE
%%EndPage: "8" 9
%%Page: "9" 9
595.3 841.9 0 FMBEGINPAGE
99.65 771.95 531.65 789.95 R
7 X
0 K
V
1 12 Q
0 X
(- 9 -) 305.66 781.95 T
99.65 96.95 531.65 744.95 R
7 X
V
0 X
(as \324metrics-guided search\325.) 99.65 736.95 T
4.99 (With these techniques, ideal solutions cannot normally be guaranteed but) 135.65 720.95 P
4.96 (acceptably good approximate solutions can normally be found without excessive) 99.65 706.95 P
(computational demands.) 99.65 692.95 T
1.02 (There is now a fairly large literature about methods for finding good alignments) 135.65 676.95 P
-0.24 (amongst two or more sequences of symbols. Some of the existing methods are reviewed in) 99.65 662.95 P
-0.13 ([35, 3, 5, 11]. Because of the way in which the concept of MA has been generalised in this) 99.65 648.95 P
3.46 (research \050see next\051, none of the current methods for finding MAs are suitable for) 99.65 634.95 P
2.69 (incorporation in the proposed SP system. Hence the development of a new method,) 99.65 620.95 P
(outlined in Section 6) 99.65 606.95 T
0 F
(2.1) 99.65 572.95 T
(Generalisation of the concept of multiple alignment) 135.65 572.95 T
1 F
(In this research, the concept of MA has been generalised in the following way:) 99.65 548.95 T
(1) 99.65 532.95 T
-0.06 (One \050or more\051 of the sequences of symbols to be aligned has a special status and is) 135.65 532.95 P
0.52 (designated as \324New\325. The way in which the concept of \324New\325 appears to relate to) 135.65 518.95 P
(established concepts in computing is shown in Table 1.) 135.65 504.95 T
(2) 99.65 488.95 T
-0.33 (All other sequences are designated as \324Old\325. The way in which the concept of \324Old\325) 135.65 488.95 P
(appears to relate to established concepts in computing is also shown in Table 1.) 135.65 474.95 T
(3) 99.65 458.95 T
0.28 (A \324good\325 alignment is one which, through the unification of symbols in New with) 135.65 458.95 P
0.81 (symbols in Old, and through unifications amongst the symbols in Old, leads to a) 135.65 444.95 P
0.69 (relatively large amount of compression of New in terms of the sequences in Old.) 135.65 430.95 P
(How this may be done is explained in Section 4, below.) 135.65 416.95 T
(4) 99.65 400.95 T
0.84 (An implication of this way of framing the alignment problem is that, by contrast) 135.65 400.95 P
1.65 (with \324multiple alignment\325 as normally understood in bio-informatics, any given) 135.65 386.95 P
1.54 (sequence in Old may appear two or more times in any one alignment and may) 135.65 372.95 P
0.52 (therefore be aligned with itself \050with the obvious restriction that any one instance) 135.65 358.95 P
(of a symbol may not be aligned with itself\051.) 135.65 341.49 T
1 10 Q
(4) 345.18 346.29 T
99.65 238.95 531.65 253.93 C
99.65 238.95 531.65 253.93 R
7 X
0 K
V
108.65 251.91 252.65 251.91 2 L
V
0.5 H
2 Z
0 X
N
-8.35 24.95 603.65 816.95 C
1 12 Q
0 X
0 K
0.01 (4. With the kind of MA shown in Figure 1, it is obviously possible to include two or more) 99.65 230.95 P
2 F
0.23 (copies) 99.65 216.95 P
1 F
0.23 (of a given sequence in any one alignment. To my knowledge, this is never done in) 133.52 216.95 P
0.26 (practice because it would simply lead to the trivial alignment of each symbol in one copy) 99.65 202.95 P
2.28 (with the corresponding symbol or symbols in the one or more other copies. What is) 99.65 188.95 P
-0.55 (proposed for the ICMAUS framework is different: any) 99.65 174.95 P
2 F
-0.55 (one) 361.07 174.95 P
1 F
-0.55 ( pattern may appear two or more) 378.39 174.95 P
0.04 (times in an alignment. Each appearance is just that, it is an) 99.65 160.95 P
2 F
0.04 (appearance) 383.93 160.95 P
1 F
0.04 ( of) 440.55 160.95 P
2 F
0.04 (one) 456.62 160.95 P
1 F
0.04 ( pattern, not) 473.94 160.95 P
1.96 (a duplicate) 99.65 146.95 P
2 F
1.96 (copy) 158.85 146.95 P
1 F
1.96 ( of a pattern. Since each appearance of a pattern represents the same) 181.49 146.95 P
-0 (pattern, it makes no sense to match a symbol from one appearance with the corresponding) 99.65 132.95 P
-0.41 (symbol in another appearance because this is simply matching one instance of symbol with) 99.65 118.95 P
(itself. Any such match is spurious and must be forbidden.) 99.65 104.95 T
FMENDPAGE
%%EndPage: "9" 10
%%Page: "10" 10
595.3 841.9 0 FMBEGINPAGE
99.65 771.95 531.65 789.95 R
7 X
0 K
V
1 12 Q
0 X
(- 10 -) 302.66 781.95 T
99.65 96.95 531.65 744.95 R
7 X
V
2 F
0 X
(Area of) 135.65 736.95 T
(New) 243.65 736.95 T
(Old) 387.65 736.95 T
(application) 135.65 721.95 T
1 F
(Unsupervised) 135.65 691.95 T
(\324Raw\325 data.) 243.65 691.95 T
(Grammar or other) 387.65 691.95 T
(inductive learning) 135.65 676.95 T
(knowledge structure) 387.65 676.95 T
(created by learning.) 387.65 661.95 T
(Parsing) 135.65 631.95 T
(The sentence to be) 243.65 631.95 T
(parsed) 336.24 631.95 T
(.) 367.54 631.95 T
(The grammar used for) 387.65 631.95 T
(parsing.) 387.65 616.95 T
(Pattern) 135.65 586.95 T
(A pattern to be) 243.65 586.95 T
(The stored knowledge used) 387.65 586.95 T
(recognition) 135.65 571.95 T
(recognised.) 243.65 571.95 T
(to recognise one pattern) 387.65 571.95 T
(and scene) 135.65 556.95 T
(or scene to be) 243.65 556.95 T
(or several within a scene.) 387.65 556.95 T
(analysis) 135.65 541.95 T
(analysed.) 243.65 541.95 T
(Databases) 135.65 511.95 T
(A \324query\325 in SQL or other) 243.65 511.95 T
(Records stored in the) 387.65 511.95 T
(query language.) 243.65 496.95 T
(database.) 387.65 496.95 T
(Expert) 135.65 466.95 T
(A \324query\325 in the query) 243.65 466.95 T
(The \324rules\325 or other) 387.65 466.95 T
(system) 135.65 451.95 T
(language for the expert) 243.65 451.95 T
(knowledge stored in) 387.65 451.95 T
( system.) 243.65 436.95 T
(the expert system.) 387.65 436.95 T
(Computer) 135.65 406.95 T
(The \324data\325 or) 243.65 406.95 T
( \324parameters\325) 306.25 406.95 T
(The computer program) 387.65 406.95 T
(program) 135.65 391.95 T
(supplied to the program) 243.65 391.95 T
(itself.) 387.65 391.95 T
(on each run.) 243.65 376.95 T
0 F
(Table 1) 135.65 352.95 T
1 F
-0.09 (The way in which the concepts of \324New\325 and \324Old\325 in this research appear to relate) 135.65 336.95 P
(to established concepts in computing.) 135.65 322.95 T
0 F
(2.1.1) 99.65 288.95 T
(Higher dimensions) 135.65 288.95 T
1 F
-0 (It should be clear that this concept of MA \050and the bio-informatics version of the concept\051) 99.65 264.95 P
1.46 (may be generalised to two-dimensional \050or even higher-dimensional\051 patterns. There is) 99.65 250.95 P
-0.35 (likely to be a case, at some stage in the SP programme, for extending the ideas described in) 99.65 236.95 P
(this article into the domain of two or more dimensions.) 99.65 222.95 T
0 F
(3) 99.65 188.95 T
(A simple example) 135.65 188.95 T
1 F
1.6 (The main concepts to be presented can probably best be described with reference to a) 99.65 164.95 P
2.14 (simple example. Since \324parsing\325 in the sense understood in theoretical linguistics and) 99.65 150.95 P
1.69 (natural language processing has come to be a paradigm for the several concepts to be) 99.65 136.95 P
-0.27 (described, it will provide our first example despite the fact that, when the input sentence or) 99.65 122.95 P
0.13 (phrase to be parsed is complete, there is no significant PrbRs as understood in this article.) 99.65 108.95 P
FMENDPAGE
%%EndPage: "10" 11
%%Page: "11" 11
595.3 841.9 0 FMBEGINPAGE
99.65 771.95 531.65 789.95 R
7 X
0 K
V
1 12 Q
0 X
(- 11 -) 302.66 781.95 T
99.65 96.95 531.65 744.95 R
7 X
V
0 X
0.73 (Sections 3.1 and 3.2 show how the parsing of a) 99.65 736.95 P
2 F
0.73 (very) 336.1 736.95 P
1 F
0.73 ( simple sentence with a) 356.74 736.95 P
2 F
0.73 (very) 475.3 736.95 P
1 F
0.73 ( simple) 495.93 736.95 P
-0.27 (grammar may be understood as MA. Much more elaborate examples can be found in [42].) 99.65 722.95 P
0 F
(3.1) 99.65 688.95 T
(Representing a grammar with patterns of symbols) 135.65 688.95 T
1 F
1.5 (Figure 2 shows a simple context-free phrase-structure grammar \050CF-PSG\051 describing a) 99.65 664.95 P
0.12 (fragment of the syntax of English. This grammar generates the four sentences \324) 99.65 650.95 P
4 F
0.28 (j o h n) 480.43 650.95 P
-0.17 (r u n s) 99.65 636.95 P
1 F
-0.07 (\325, \324) 149.5 636.95 P
4 F
-0.17 (j o h n w a l k s) 163.41 636.95 P
1 F
-0.07 (\325, \324) 284.35 636.95 P
4 F
-0.17 (s u s a n r u n s) 298.26 636.95 P
1 F
-0.07 (\325, and \324) 419.2 636.95 P
4 F
-0.17 (s u s a n w) 453.36 636.95 P
-1.38 (a l k s) 99.65 622.95 P
1 F
-0.57 (\325. Any of these sentences may be parsed in terms of the grammar giving a labelled) 145.89 622.95 P
(bracketing like this:) 99.65 608.95 T
4 F
(\050S\050N j o h n\051\050V r u n s\051) 135.65 592.95 T
1 F
(\051) 308.35 592.95 T
(or an equivalent representation in the form of a tree.) 99.65 576.95 T
4 F
(S) 135.65 552.95 T
5 F
(\256) 145.84 552.95 T
4 F
(N V) 160.68 552.95 T
(N) 135.65 537.95 T
5 F
(\256) 145.84 537.95 T
4 F
(j o h n) 160.68 537.95 T
(N) 135.65 522.95 T
5 F
(\256) 145.84 522.95 T
4 F
(s u s a n) 160.68 522.95 T
(V) 135.65 507.95 T
5 F
(\256) 145.84 507.95 T
4 F
(w a l k s) 160.68 507.95 T
(V) 135.65 492.95 T
5 F
(\256) 145.84 492.95 T
4 F
(r u n s) 160.68 492.95 T
0 F
(Figure 2) 135.65 468.95 T
1 F
(A CF-PSG describing a fragment of English syntax.) 135.65 452.95 T
-0.15 (Figure 3 shows the grammar from Figure 2 expressed as a set of) 135.65 428.95 P
2 F
-0.15 (strings) 444.52 428.95 P
1 F
-0.15 (,) 477.18 428.95 P
2 F
-0.15 (sequences) 483.03 428.95 P
1 F
-0.61 (or) 99.65 414.95 P
2 F
-0.61 (patterns) 112.03 414.95 P
1 F
-0.61 ( of) 151.35 414.95 P
2 F
-0.61 (symbols) 166.12 414.95 P
1 F
-0.61 (.) 204.76 414.95 P
1 10 Q
-0.5 (5) 207.76 419.75 P
1 12 Q
-0.61 ( Each pattern in this \324grammar\325 is like a re-write rule in the CF-PSG) 212.76 414.95 P
0.69 (notation except that the rewrite arrow has been removed, some other symbols have been) 99.65 400.95 P
-0.06 (introduced \050\324) 99.65 386.95 P
4 F
-0.14 (0) 161.87 386.95 P
1 F
-0.06 (\325, \324) 169.07 386.95 P
4 F
-0.14 (1) 182.99 386.95 P
1 F
-0.06 (\325 and symbols with an initial \324) 190.19 386.95 P
4 F
-0.14 (#) 333.76 386.95 P
1 F
-0.06 (\325 character\051 and there is a number to the) 340.95 386.95 P
(right of each rule.) 99.65 372.95 T
1 10 Q
(6,7,8) 184.91 377.75 T
1 12 Q
1.73 (The number to the right of each rule in Figure 3 is an imaginary frequency of) 135.65 356.95 P
-0.15 (occurrence of the rule in a parsing of a notional sample of the language. These frequencies) 99.65 342.95 P
(of occurrence will be discussed later.) 99.65 328.95 T
-0.12 (The reasons for the symbols which have been added to each rule will become clear) 135.65 312.95 P
99.65 284.95 531.65 299.93 C
99.65 284.95 531.65 299.93 R
7 X
0 K
V
108.65 297.91 252.65 297.91 2 L
V
0.5 H
2 Z
0 X
N
-8.35 24.95 603.65 816.95 C
1 12 Q
0 X
0 K
(5. For definitions of terms, see Appendix A1.) 99.65 276.95 T
1.2 (6. For the remainder of this article, quote marks will be dropped when referring to any) 99.65 260.95 P
-0.71 (grammar like that in Figure 3 which is expressed as patterns of symbols. Likewise, the word) 99.65 246.95 P
(\324rule\325 with respect to this kind of grammar will be referred to without quote marks.) 99.65 232.95 T
-0.41 (7. This example of a grammar and of how it is used in parsing may give the impression that) 99.65 216.95 P
0.53 (the ICMAUS framework is merely a trivial variation of familiar concepts of context-free) 99.65 202.95 P
0.37 (phrase-structure grammar \050CF-PSG\051 with their well-known inadequacies for representing) 99.65 188.95 P
0.33 (and analysing the \324context sensitive\325 structures found in natural languages. The examples) 99.65 174.95 P
0.82 (presented in [42] show that the ICMAUS framework is much more \324powerful\325 than CF-) 99.65 160.95 P
1.63 (PSGs and can accommodate quite subtle context-sensitive features of natural language) 99.65 146.95 P
0.33 (syntax in a simple and elegant manner. Achieving this expressive power with a relatively) 99.65 132.95 P
-0.09 (simple notation is made possible by the relatively sophisticated search processes which lie) 99.65 118.95 P
(at the heart of the SP model.) 99.65 104.95 T
FMENDPAGE
%%EndPage: "11" 12
%%Page: "12" 12
595.3 841.9 0 FMBEGINPAGE
99.65 771.95 531.65 789.95 R
7 X
0 K
V
1 12 Q
0 X
(- 12 -) 302.66 781.95 T
99.65 96.95 531.65 744.95 R
7 X
V
0 X
1.6 (but a few words of explanation are in order here. The symbols \324) 99.65 736.95 P
4 F
3.84 (0) 424.27 736.95 P
1 F
1.6 (\325 and \324) 431.47 736.95 P
4 F
3.84 (1) 465.97 736.95 P
1 F
1.6 (\325 have been) 473.17 736.95 P
0.19 (introduced to differentiate the two versions of the \324) 99.65 722.95 P
4 F
0.44 (N) 345.62 722.95 P
1 F
0.19 (\325 patterns and the two versions of the) 352.81 722.95 P
1.05 (\324) 99.65 708.95 P
4 F
2.53 (V) 103.64 708.95 P
1 F
1.05 (\325 patterns. They enter into matching and unification in exactly the same way as other) 110.84 708.95 P
1.38 (symbols. Although the symbols are the same as are used in other contexts to represent) 99.65 694.95 P
(numbers they do not have the meaning of numbers in this grammar.) 99.65 680.95 T
4 F
(S N #N V #V #S) 135.65 656.95 T
(1000) 351.65 656.95 T
(N 0 j o h n #N) 135.65 641.95 T
(300) 351.65 641.95 T
(N 1 s u s a n #N) 135.65 626.95 T
(700) 351.65 626.95 T
(V 0 w a l k s #V) 135.65 611.95 T
(650) 351.65 611.95 T
(V 1 r u n s #V) 135.65 596.95 T
(350) 351.65 596.95 T
0 F
(Figure 3) 135.65 572.95 T
1 F
0.07 (A simple grammar written as patterns of symbols. For each rule, there is a number) 135.65 556.95 P
2 (on the right representing the frequency of occurrence of the rule in a notional) 135.65 542.95 P
(sample of the language.) 135.65 528.95 T
3.1 (The symbols which begin with \324) 135.65 504.95 P
4 F
7.43 (#) 306.04 504.95 P
1 F
3.1 (\325 \050e.g., \324) 313.23 504.95 P
4 F
7.43 (#S) 357.72 504.95 P
1 F
3.1 (\325, \324) 372.11 504.95 P
4 F
7.43 (#NP) 389.2 504.95 P
1 F
3.1 (\325\051 serve as \324termination) 410.78 504.95 P
0.27 (markers\325 for patterns in the grammar. Although their informal description as \324termination) 99.65 490.95 P
-0.39 (markers\325 suggests that these symbols are meta symbols with special meaning, they have no) 99.65 476.95 P
(hidden meaning and they enter into matching and unification like every other symbol.) 99.65 462.95 T
1 10 Q
(9) 511.71 467.75 T
1 12 Q
0.42 (In general, all the symbols which can be seen in Figure 3 enter into matching and) 135.65 446.95 P
1.93 (unification in the same way. Although some of these symbols can be seen to serve a) 99.65 432.95 P
0.04 (distinctive role, there is no hidden meaning attached to any of them and there is no formal) 99.65 418.95 P
-0 (distinction between upper- and lower-case letters or between digit symbols and alphabetic) 99.65 404.95 P
(symbols and so on.) 99.65 390.95 T
1 10 Q
(10) 191.6 395.75 T
0 12 Q
(3.2) 99.65 356.95 T
(Parsing as alignment of a sentence and rules in a grammar) 135.65 356.95 T
1 F
0.16 (Figure 4 shows how a parsing of the sentence \324) 99.65 332.95 P
4 F
0.39 (j o h n r u n s) 326.6 332.95 P
1 F
0.16 (\325 may be seen as an) 437.26 332.95 P
1.19 (alignment of patterns which includes the sentence and relevant rules from the grammar) 99.65 318.95 P
-0.59 (shown in Figure 3. The similarity between this alignment and the conventional parsing may) 99.65 304.95 P
99.65 284.95 531.65 299.93 C
99.65 284.95 531.65 299.93 R
7 X
0 K
V
108.65 297.91 252.65 297.91 2 L
V
0.5 H
2 Z
0 X
N
-8.35 24.95 603.65 816.95 C
1 12 Q
0 X
0 K
0.55 (8. For the sake of clarity in exposition and to save space, all the grammars shown in this) 99.65 276.95 P
0.12 (article are much simpler than in any practical system. For similar reasons, all examples of) 99.65 262.95 P
-0.51 (MAs which are presented have been chosen so that they are small enough to fit on one page) 99.65 248.95 P
-0.05 (without resorting to font sizes which are too small to read. However, for the reasons given) 99.65 234.95 P
-0.6 (in Section 6.3, the model appears to be general and scalable to realistically large knowledge) 99.65 220.95 P
(structures and alignments.) 99.65 206.95 T
(9. See qualification of this assertion below.) 99.65 190.95 T
0.79 (10. The foregoing assertions are not strictly true of the method of evaluating alignments) 99.65 174.95 P
0.17 (which is used in the SP61 model \050see Appendix 5.5\051. The principle of \322no meta symbols\323) 99.65 160.95 P
0.24 (and thus \322no hidden meanings for symbols\323 is an ideal which this research aims to attain.) 99.65 146.95 P
1.83 (But, as a temporary solution to the problem of scoring alignments pending something) 99.65 132.95 P
0.14 (better, a distinction has been recognised in the method for scoring alignments in the SP61) 99.65 118.95 P
(model \050Section 4\051 between symbols which begin with \324) 99.65 104.95 T
4 F
(#) 364.81 104.95 T
1 F
(\325 and all other symbols.) 372.01 104.95 T
FMENDPAGE
%%EndPage: "12" 13
%%Page: "13" 13
595.3 841.9 0 FMBEGINPAGE
99.65 771.95 531.65 789.95 R
7 X
0 K
V
1 12 Q
0 X
(- 13 -) 302.66 781.95 T
99.65 96.95 531.65 744.95 R
7 X
V
0 X
(be seen if the symbols in the alignment are \324projected\325 on to a single sequence, thus:) 99.65 736.95 T
4 F
(S N 0 j o h n #N V 1 r u n s #V) 135.65 720.95 T
1 F
( #S) 358.73 720.95 T
-0.49 (In this projection, the two instances of \324) 99.65 704.95 P
4 F
-1.18 (N) 286.73 704.95 P
1 F
-0.49 (\325 in the second column of the alignment have been) 293.93 704.95 P
-0.13 (merged or \324unified\325 and likewise for the two instances of \324) 99.65 690.95 P
4 F
-0.31 (#N) 378.12 690.95 P
1 F
-0.13 (\325 in the eighth column and so) 392.52 690.95 P
(on wherever there are two or more instances of a symbol in any column.) 99.65 676.95 T
(--------------------------------------------------------------------) 135.65 660.95 T
(Figure 4 about here) 171.65 644.95 T
(--------------------------------------------------------------------) 135.65 628.95 T
0 F
(Figure 4) 135.65 604.95 T
1 F
-0.29 (Parsing of the sentence \324) 135.65 588.95 P
4 F
-0.69 (j o h n r u n s) 252.41 588.95 P
1 F
-0.29 (\325 as an alignment amongst sequences) 355.54 588.95 P
(representing the sentence and relevant rules in the grammar in Figure 3.) 135.65 574.95 T
1.61 (This projection is the same as the conventional parsing except that \324) 135.65 550.95 P
4 F
3.85 (0) 478.75 550.95 P
1 F
1.61 (\325 and \324) 485.94 550.95 P
4 F
3.85 (1) 520.46 550.95 P
1 F
1.61 (\325) 527.65 550.95 P
0.89 (symbols are included, right bracket symbols \050\324) 99.65 536.95 P
4 F
2.14 (\051) 327.84 536.95 P
1 F
0.89 (\325\051 are replaced by \324termination markers\325) 335.04 536.95 P
-0.31 (and each of the upper-case symbols is regarded both as a \324label\325 for a structure and as a left) 99.65 522.95 P
(bracket for that structure.) 99.65 508.95 T
-0.49 (As was noted in Section 2.1, the sentence or other sequence of symbols to be parsed) 135.65 492.95 P
0.79 (is regarded as New, while the rules in the grammar are regarded as Old. For the sake of) 99.65 478.95 P
-0.05 (readability and ease of interpretation, New is normally placed at the top of each alignment) 99.65 464.95 P
(with patterns from Old below it.) 99.65 450.95 T
3.45 (For the sake of clarity in Figure 4 and other alignments shown later, each) 135.65 434.95 P
-0.38 (appearance of a pattern in any alignment is given a line to itself. Apart from the convention) 99.65 420.95 P
-0.26 (that New is always at the top, the order in which patterns appear \050from top to bottom of the) 99.65 406.95 P
0.24 (alignment\051 is entirely arbitrary. An alignment in which the patterns appear in one order is) 99.65 392.95 P
0.67 (entirely equivalent to an alignment in which they appear in any other order, provided all) 99.65 378.95 P
(other aspects of the alignment are the same.) 99.65 364.95 T
0 F
(4) 99.65 330.95 T
(Evaluation of an alignment in terms of IC) 135.65 330.95 T
1 F
2.49 (Intuitively, a good alignment is one which has many) 99.65 306.95 P
2 F
2.49 (hits) 376.6 306.95 P
1 F
2.49 ( \050positive matches between) 393.93 306.95 P
-0.02 (symbols\051, few) 99.65 292.95 P
2 F
-0.02 (gaps) 170.56 292.95 P
1 F
-0.02 ( \050sequences of one or more symbols which are not part of any hit\051 and,) 193.21 292.95 P
-0.28 (where there are gaps, they should be as short as possible. It is possible to use measures like) 99.65 278.95 P
-0.75 (these directly in computer programs for finding good MAs and, indeed, they commonly are.) 99.65 264.95 P
-0.55 (However, our confidence in the validity of measures like these may be increased if they can) 99.65 250.95 P
(be placed within a broader theoretical framework.) 99.65 236.95 T
1.78 (Concepts of information, IC and related concepts of probability may provide a) 135.65 220.95 P
0.69 (suitable framework. Work on the evaluation of MAs in this tradition includes [27], [12],) 99.65 206.95 P
([2], [5], [1] and [50].) 99.65 192.95 T
1.11 (As was indicated in Section 2.1,) 135.65 176.95 P
2 F
1.11 (a good alignment is defined here as one which) 299.55 176.95 P
(provides a basis for an economical coding of New in terms of the patterns Old) 99.65 162.95 T
1 F
(.) 475.44 162.95 T
1.48 (The compression method which is described in Section 4.2, below, exploits the) 135.65 146.95 P
1.41 (elementary principle that a \050relatively long\051 sequence of symbols which repeats two or) 99.65 132.95 P
0.6 (more times in a body of information may be replaced by a shorter \324code\325, \324codeword\325 or) 99.65 118.95 P
-0.12 (\324tag pattern\325 associated with that pattern in some kind of \324dictionary\325 of patterns. In effect,) 99.65 104.95 P
FMENDPAGE
%%EndPage: "13" 14
%%Page: "14" 14
595.3 841.9 0 FMBEGINPAGE
99.65 771.95 531.65 789.95 R
7 X
0 K
V
1 12 Q
0 X
(- 14 -) 302.66 781.95 T
99.65 96.95 531.65 744.95 R
7 X
V
0 X
0.09 (each instance of the pattern in the data is unified with the same pattern as it appears in the) 99.65 736.95 P
(repository of patterns. This is the basis of all standard methods for IC [34].) 99.65 722.95 T
(The present proposals differ from standard methods in two ways:) 135.65 706.95 T
(\245) 99.65 690.95 T
-0.03 (Each code serves a dual role: to identify the corresponding pattern uniquely within) 135.65 690.95 P
2.64 (the grammar, and to mark the left and right ends of the pattern. For present) 135.65 676.95 P
3.17 (purposes, the second role is required to remove the ambiguity which would) 135.65 662.95 P
(otherwise exist about left-to-right sequencing of symbols in alignments.) 135.65 645.49 T
1 10 Q
(11) 480.43 650.29 T
1 12 Q
(\245) 99.65 629.49 T
0.89 (In the present scheme, the coding principle may be applied through two or more) 135.65 629.49 P
-0.49 (\324levels\325 so that the symbols which encode a sequence of two or more patterns at one) 135.65 615.49 P
-0.1 (level may themselves be recognised as an instance of a recurrent pattern which has) 135.65 601.49 P
(its own code at the next higher level. Examples will be seen later.) 135.65 587.49 T
0.59 (A key point in this connection is that a recurrent pattern may be) 135.65 571.48 P
2 F
0.59 (discontinuous) 452.09 571.48 P
1 F
0.59 ( in) 518.73 571.48 P
0.31 (the sense that) 99.65 557.48 P
2 F
0.31 (the symbols in the pattern are not necessarily contiguous as they appear in) 168.19 557.48 P
3.15 (any or all of its occurrences) 99.65 543.48 P
1 F
3.15 (. In other words, a recurrent pattern may appear as a) 250.33 543.48 P
0.02 (subsequence within larger patterns.) 99.65 529.48 P
1 10 Q
0.02 (12) 268.92 534.29 P
1 12 Q
0.02 ( Thus, for example, a sequence of symbols like \324A B) 278.92 529.48 P
-0.12 (C D E F\325 may be recognised as a recurrent pattern within a set of instances which includes) 99.65 515.48 P
-0 (patterns like \324) 99.65 501.49 P
4 F
-0.01 (P A B Q C R D E F S) 165.6 501.49 P
1 F
-0 (\325, \324) 302.27 501.49 P
4 F
-0.01 (A L B C D M N E F O P) 316.25 501.49 P
1 F
-0 (\325, \324) 467.31 501.49 P
4 F
-0.01 (X A B C) 481.29 501.49 P
(D Y E F Z) 99.65 487.49 T
1 F
(\325 and so on.) 164.41 487.49 T
2.63 (In Section 4.2, below, I shall give an informal explanation of the method of) 135.65 471.49 P
0.12 (calculating the IC associated with any alignment. The details of the method of calculation) 99.65 457.49 P
(are presented in Section 5.) 99.65 443.49 T
0 F
(4.1) 99.65 409.49 T
(Lossless and lossy compression) 135.65 409.49 T
1 F
-0.67 (In connection with any kind of IC, it is important to distinguish \324lossless\325 compression from) 99.65 385.49 P
2.81 (\324lossy\325 compression. The former means IC where redundant information is the only) 99.65 371.49 P
0.44 (information which is removed. The latter means IC where redundant information may be) 99.65 357.49 P
-0.38 (and normally is removed but, in addition, there is loss of non-redundant information. In the) 99.65 343.49 P
0.05 (first case, the original information may be re-created with complete fidelity. In the second) 99.65 329.49 P
(case, the original information can never be perfectly restored.) 99.65 315.49 T
0.49 (Currently, this research programme assumes that lossless compression is the rule.) 135.65 299.49 P
-0.02 (In future research on the ICMAUS concepts, there may be a case for exploring the several) 99.65 285.49 P
(ways in which lossy compression may be used.) 99.65 271.49 T
0 F
(4.2) 99.65 237.49 T
(Evaluation of an alignment in terms of IC: informal description) 135.65 237.49 T
1 F
0.96 (The following subsections describe informally how the IC associated with an alignment) 99.65 213.49 P
99.65 184.95 531.65 199.93 C
99.65 184.95 531.65 199.93 R
7 X
0 K
V
108.65 197.91 252.65 197.91 2 L
V
0.5 H
2 Z
0 X
N
-8.35 24.95 603.65 816.95 C
1 12 Q
0 X
0 K
1.48 (11. Although standard methods of data compression appear, superficially, to use codes) 99.65 176.95 P
0.83 (without termination markers, there is an unavoidable need for information about the left) 99.65 162.95 P
2.02 (and right boundaries of each coded pattern and this information is normally provided) 99.65 148.95 P
0.03 (implicitly by the use of space characters or end-of-line characters or something similar, or) 99.65 134.95 P
(by the use of a trie for the recognition of variable-length codes.) 99.65 120.95 T
(12. The terms) 99.65 104.95 T
2 F
(subsequence) 168.93 104.95 T
1 F
( and) 229.55 104.95 T
2 F
(substring) 252.86 104.95 T
1 F
( are defined in Appendix A1.) 297.52 104.95 T
FMENDPAGE
%%EndPage: "14" 15
%%Page: "15" 15
595.3 841.9 0 FMBEGINPAGE
99.65 771.95 531.65 789.95 R
7 X
0 K
V
1 12 Q
0 X
(- 15 -) 302.66 781.95 T
99.65 96.95 531.65 744.95 R
7 X
V
0 X
0.1 (may be calculated when the alignment may be interpreted as a parsing of New in terms of) 99.65 736.95 P
(Old. The alignment in Figure 4 will be our example.) 99.65 722.95 T
-0.41 (The IC associated with any alignment is not realised until New has been encoded in) 135.65 706.95 P
1.54 (terms of the patterns from Old which appear in the alignment. Nevertheless, it will be) 99.65 692.95 P
0.66 (convenient at times in this article and the ones that follow, to say that a given alignment) 99.65 678.95 P
2.19 (encodes New or compresses New. Wherever this form of words is used it should be) 99.65 664.95 P
-0.53 (understood to mean that a code which expresses New in a compressed form may be derived) 99.65 650.95 P
(from the alignments under consideration.) 99.65 636.95 T
0 F
(4.3) 99.65 602.95 T
(Encoding individual symbols) 135.65 602.95 T
1 F
-0.51 (Regarding individual symbols in the sentence and the grammar, the simplest way to) 135.65 578.95 P
0.72 (encode them is with a \324block\325 code using a fixed number of bits for each symbol. In the) 99.65 564.95 P
0.46 (grammar in Figure 3, there are 20 symbol types so the minimum number of bits required) 99.65 550.95 P
(for each symbol is) 99.65 536.95 T
5 F
(\351) 190.92 536.95 T
1 F
(log) 195.52 536.95 T
1 10 Q
(2) 210.85 533.95 T
1 12 Q
( 2) 215.85 536.95 T
5 F
(0\371) 224.84 536.95 T
1 F
( = 5 bits per symbol.) 235.45 536.95 T
3.08 (In fact, the SP61 model uses variable-length codes for symbols, assigned in) 135.65 520.95 P
0.5 (accordance with a modified version of the Shannon-Fano-Elias \050S-F-E\051 coding scheme) 99.65 506.95 P
1 10 Q
0.41 (13) 521.65 511.75 P
1 12 Q
(so that the shortest codes represent the most frequent symbols and) 99.65 492.95 T
2 F
(vice versa) 419.76 492.95 T
1 F
(.) 468.05 492.95 T
1 10 Q
(14) 471.05 497.75 T
1 12 Q
0.3 (Notice that the number of bits required for each symbol is entirely independent of) 135.65 476.95 P
-0.02 (the number of characters in the name of the symbol as it is shown in the examples. Names) 99.65 462.95 P
(of symbols are chosen purely for their mnemonic value and to aid comprehension.) 99.65 448.95 T
0.76 (There are many variations and refinements that may be made at this level but, in) 135.65 432.95 P
-0.38 (general, the choice of coding system for individual symbols is not critical for the principles) 99.65 418.95 P
0.26 (to be described below where the focus of interest is the exploitation of redundancy which) 99.65 404.95 P
2.51 (may be attributed to) 99.65 390.95 P
2 F
2.51 (sequences) 208.96 390.95 P
1 F
2.51 ( of two or more symbols rather than any redundancy) 257.58 390.95 P
(attributed to unbalanced frequencies of individual symbols.) 99.65 376.95 T
-0 (For reasons which are given in Section 5 \050and Section 6 of [42]\051, the code for each) 135.65 360.95 P
-0.55 (symbol has two different values \050in bits\051: a) 99.65 346.95 P
2 F
-0.55 (minimum cost) 303.76 346.95 P
1 F
-0.55 ( which is the theoretical minimum) 370.17 346.95 P
3.36 (number of bits needed to represent that symbol according to the \050modified\051 S-F-E) 99.65 332.95 P
0.71 (calculations, and an) 99.65 318.95 P
2 F
0.71 (actual cost) 199.7 318.95 P
1 F
0.71 ( which is larger by a constant) 252.71 318.95 P
2 F
0.71 (cost factor) 401.23 318.95 P
1 F
0.71 (, whose value is) 452.91 318.95 P
(typically about 10 or 20.) 99.65 304.95 T
1.59 (In the following informal description of the encoding principles, the distinction) 135.65 288.95 P
-0.07 (between the minimum cost and the actual cost of each symbol is not important and will be) 99.65 274.95 P
0.04 (ignored. For the sake of simplicity in this presentation, it will be assumed that all symbols) 99.65 260.95 P
1.89 (are encoded with the same number of bits so that \324one symbol\325 can be treated as the) 99.65 246.95 P
(minimum unit of information.) 99.65 232.95 T
99.65 170.95 531.65 185.93 C
99.65 170.95 531.65 185.93 R
7 X
0 K
V
108.65 183.91 252.65 183.91 2 L
V
0.5 H
2 Z
0 X
N
-8.35 24.95 603.65 816.95 C
1 12 Q
0 X
0 K
(13. The coding scheme \050without the present modification\051 is described by [8].) 99.65 162.95 T
-0.38 (14. The Huffman coding scheme, described in [8] and also in [34], is better known than the) 99.65 146.95 P
0.83 (S-F-E scheme and is a little more efficient in terms of IC. But the S-F-E scheme allows) 99.65 132.95 P
-0.22 (probabilities to be inferred more exactly from the sizes of codes assigned by the scheme as) 99.65 118.95 P
(will be described in Section 7.) 99.65 104.95 T
FMENDPAGE
%%EndPage: "15" 16
%%Page: "16" 16
595.3 841.9 0 FMBEGINPAGE
99.65 771.95 531.65 789.95 R
7 X
0 K
V
1 12 Q
0 X
(- 16 -) 302.66 781.95 T
99.65 96.95 531.65 744.95 R
7 X
V
0 F
0 X
(4.4) 99.65 736.95 T
(Encoding words) 135.65 736.95 T
1 F
1.06 (A word in the sentence to be parsed may be encoded using some shorter code which is) 99.65 712.95 P
0.56 (associated with the word in the dictionary and which identifies the word uniquely within) 99.65 698.95 P
1.27 (the dictionary. In this case, the grammar in Figure 3 plays the part of the dictionary of) 99.65 684.95 P
(segments in a standard compression system.) 99.65 670.95 T
-0.08 (In accordance with the remarks above, the code pattern must show where the word) 135.65 654.95 P
-0.52 (starts and also where it finishes. In some cases, this can be achieved by using letter symbols) 99.65 640.95 P
0.51 (from the word itself. Thus \324) 99.65 626.95 P
4 F
1.22 (g l) 234.76 626.95 P
1 F
0.51 (\325 could be used as an abbreviation for \324) 257.57 626.95 P
4 F
1.22 (g i r l) 448.81 626.95 P
1 F
0.51 (\325 very) 502.83 626.95 P
-0.07 (much in the same way as we use abbreviations like \324B\325ham\325 for \324Birmingham\325 in ordinary) 99.65 612.95 P
0.34 (writing. However, because of the haphazard nature of such markers, there are advantages) 99.65 598.95 P
-0.27 (if we use code symbols introduced especially for the purpose at the beginnings and ends of) 99.65 584.95 P
(words.) 99.65 570.95 T
0.69 (In the light of these remarks, it should be clear that the word \324) 135.65 554.95 P
4 F
1.64 (j o h n) 441 554.95 P
1 F
0.69 (\325 in the) 496.3 554.95 P
0.12 (grammar shown in Figure 3 may be encoded as \324) 99.65 540.95 P
4 F
0.28 (N 0 #N) 334.86 540.95 P
1 F
0.12 (\325, the word \324) 378.6 540.95 P
4 F
0.28 (r u n s) 438.23 540.95 P
1 F
0.12 (\325 may be) 489.45 540.95 P
-0.51 (encoded as \324) 99.65 526.95 P
4 F
-1.22 (V 1 #V) 158.58 526.95 P
1 F
-0.51 (\325 and likewise for the other words. In all cases there is a modest saving) 199.32 526.95 P
-0.62 (of one or two symbols for each word. If the grammar contained words with fewer than three) 99.65 512.95 P
(letters, then the \324saving\325 in those cases would be negative!) 99.65 498.95 T
0 F
(4.5) 99.65 464.95 T
(Encoding the sentence) 135.65 464.95 T
1 F
-0.25 (Given the two words in their encoded forms \050\324) 99.65 440.95 P
4 F
-0.6 (N 0 #N) 320.14 440.95 P
1 F
-0.25 (\325 for \324) 362.11 440.95 P
4 F
-0.6 (j o h n) 389.58 440.95 P
1 F
-0.25 (\325 and \324) 438.15 440.95 P
4 F
-0.6 (V 1 #V) 468.95 440.95 P
1 F
-0.25 (\325 for) 510.92 440.95 P
0.13 (\324) 99.65 426.95 P
4 F
0.32 (r u n s) 103.64 426.95 P
1 F
0.13 (\325\051, the whole sentence may be encoded as \324) 154.96 426.95 P
4 F
0.32 (N 0 #N V 1 #V) 362.18 426.95 P
1 F
0.13 (\325. This code for) 457.31 426.95 P
-0.26 (the sentence achieves a small amount of compression because the code contains 2 symbols) 99.65 412.95 P
(less than the number of symbols in the original sentence.) 99.65 398.95 T
-0.36 (However, this sequence contains the subsequence \324) 135.65 382.95 P
4 F
-0.86 (N #N V #V) 378.96 382.95 P
1 F
-0.36 (\325 and this sequence) 441.13 382.95 P
0.31 (is a substring within the \324sentence\325 pattern \324) 99.65 368.95 P
4 F
0.75 (S N #N V #V #S) 312.02 368.95 P
1 F
0.31 (\325 - which appears as the) 416.52 368.95 P
-0.16 (first pattern in the grammar. So we may replace the sequence \324) 99.65 354.95 P
4 F
-0.39 (N #N V #V) 398.28 354.95 P
1 F
-0.16 (\325 by the \324code\325) 461.87 354.95 P
0.08 (sequence \324) 99.65 340.95 P
4 F
0.19 (S #S) 150.68 340.95 P
1 F
0.08 (\325. To discriminate the words in this sentence we must add the symbols \324) 179.65 340.95 P
4 F
0.19 (0) 524.45 340.95 P
-0.47 (1) 99.65 326.95 P
1 F
-0.19 (\325 from the sequence \324) 106.84 326.95 P
4 F
-0.47 (N 0 #N V 1 #V) 207.98 326.95 P
1 F
-0.19 (\325. The overall result is an encoded representation) 299.2 326.95 P
(of the sentence as:) 99.65 312.95 T
4 F
(S 0 1 #S) 135.65 296.95 T
1 F
(.) 193.22 296.95 T
0.05 (The 4 symbols in this encoding of the sentence represents a useful compression compared) 99.65 280.95 P
(with the 8 symbols in the unencoded sentence.) 99.65 266.95 T
0 F
(4.6) 99.65 232.95 T
(Encoding patterns with arbitrarily many levels of parts and subparts) 135.65 232.95 T
1 F
0.2 (The simple example we are considering contains only one main part \050the whole sentence\051) 99.65 208.95 P
1.26 (and two levels of subparts \050the words within the sentence and the symbols within each) 99.65 194.95 P
1.48 (word\051. It should be clear that the principles which have been described can be applied) 99.65 180.95 P
(through arbitrarily many levels in a part-whole hierarchy. Examples are presented in [42].) 99.65 166.95 T
0 F
(4.7) 99.65 132.95 T
(Discussion) 135.65 132.95 T
1 F
-0.2 (Each pattern expresses sequential redundancy in the data to be encoded and this sequential) 99.65 108.95 P
FMENDPAGE
%%EndPage: "16" 17
%%Page: "17" 17
595.3 841.9 0 FMBEGINPAGE
99.65 771.95 531.65 789.95 R
7 X
0 K
V
1 12 Q
0 X
(- 17 -) 302.66 781.95 T
99.65 96.95 531.65 744.95 R
7 X
V
0 X
-0.4 (redundancy can be exploited to reduce the number of symbols which need to be written out) 99.65 736.95 P
-0.5 (explicitly. In the grammar shown in Figure 3, each pattern for an individual word expresses) 99.65 722.95 P
2.38 (the sequential redundancy of the letters within that word. The pattern for a sentence) 99.65 708.95 P
(expresses the sequential redundancy of the pattern: \324noun\325 followed by \324verb\325.) 99.65 694.95 T
-0.05 (Since this principle operates at all levels in the \324hierarchy\325 of patterns, many of the) 135.65 678.95 P
0.77 (symbols at intermediate levels may be omitted completely. A sentence may be specified) 99.65 664.95 P
0.36 (with symbols marking the start and end of the sentence pattern together with interpolated) 99.65 650.95 P
(symbols which discriminate amongst alternatives at lower levels.) 99.65 636.95 T
-0.36 (The method which has been described illustrates the role of context in the encoding) 135.65 620.95 P
-0.14 (of information. Any one symbol like \324) 99.65 606.95 P
6 F
-0.16 (0) 281.69 606.95 P
1 F
-0.14 (\325 or \324) 288.36 606.95 P
6 F
-0.16 (1) 312.05 606.95 P
1 F
-0.14 (\325 is ambiguous in terms of the patterns in the) 318.72 606.95 P
-0.59 (grammar in Figure 3. But in the context of the pattern \324) 99.65 592.95 P
6 F
-0.66 (S 0 1 S) 357.6 592.95 P
1 F
-0.59 (\325, it is possible to assign each) 394.96 592.95 P
1.88 (instance of \324) 99.65 578.95 P
6 F
2.1 (0) 162.7 578.95 P
1 F
1.88 (\325 or \324) 169.37 578.95 P
6 F
2.1 (1) 197.11 578.95 P
1 F
1.88 (\325 unambiguously to one of the words in the grammar, giving the) 203.78 578.95 P
(sequence of words in the original sentence.) 99.65 564.95 T
0 F
(4.8) 99.65 530.95 T
(\324Gaps\325 and \324learning\325) 135.65 530.95 T
1 F
0.1 (Consider the simple alignments shown in Figure 5. In \050a\051 there is a gap in the sequence of) 99.65 506.95 P
-0.6 (hits in the pattern from Old while in \050b\051 the gap in the sequence of hits is in the New pattern.) 99.65 492.95 P
(In alignment \050c\051, there is a gap in both New and Old.) 99.65 478.95 T
1.15 (These three alignments differ from the one shown in Figure 4 because, in every) 135.65 462.95 P
-0.25 (case, assuming lossless compression, the pattern from New cannot be encoded precisely in) 99.65 448.95 P
0.31 (terms of the pattern from Old which appears in the alignment. In broad terms, there seem) 99.65 434.95 P
(to be two kinds of solution to this problem.) 99.65 420.95 T
(--------------------------------------------------------------------) 135.65 404.95 T
(Figure 5 about here) 171.65 388.95 T
(--------------------------------------------------------------------) 135.65 372.95 T
0 F
(Figure 5) 135.65 348.95 T
1 F
(Some examples of \324gaps\325 in alignments.) 135.65 332.95 T
0 F
(4.8.1) 99.65 298.95 T
(Taking account of gaps by \324learning\325) 135.65 298.95 T
1 F
-0.45 (In future versions of the SP model, it is anticipated that the existence of \324gaps\325 like the ones) 99.65 274.95 P
-0.6 (just shown will be accommodated by creating new patterns and codes and by modifying the) 99.65 260.95 P
(patterns in Old. This may be regarded as a form of unsupervised \324learning\325.) 99.65 246.95 T
1.4 (For example, in Figure 5 \050a\051, the pattern from Old may be modified to become) 135.65 230.95 P
-0.05 (something like \324) 99.65 216.95 P
4 F
-0.12 (t h e A #A t a b l e) 177.51 216.95 P
1 F
-0.05 (\325 and a new pattern, something like \324) 320.37 216.95 P
4 F
-0.12 (A 0 b) 495.9 216.95 P
0.82 (i g #A) 99.65 202.95 P
1 F
0.34 (\325, will be added to Old as a separate entity. Similar changes may be made in the) 144.46 202.95 P
0.98 (case of alignment \050b\051. For the alignment in Figure 5 \050c\051, the pattern from Old would be) 99.65 188.95 P
-0.22 (changed to something like \324) 99.65 174.95 P
4 F
-0.53 (t h e A #A t a b l e) 232.01 174.95 P
1 F
-0.22 (\325, as before, and two new patterns) 371.12 174.95 P
-0.62 (- something like \324) 99.65 160.95 P
4 F
-1.5 (A 0 b i g #A) 182.72 160.95 P
1 F
-0.62 (\325 and \324) 261.57 160.95 P
4 F
-1.5 (A 1 w o o d e n #A) 291.63 160.95 P
1 F
-0.62 (\325 - would be added to Old.) 409.15 160.95 P
0 F
(4.8.2) 99.65 126.95 T
(An alternative \324stop-gap\325 method for taking account of gaps) 135.65 126.95 T
1 F
1.14 (Pending the introduction of \324learning\325 to the SP model, the SP61 model uses an) 135.65 102.95 P
FMENDPAGE
%%EndPage: "17" 18
%%Page: "18" 18
595.3 841.9 0 FMBEGINPAGE
99.65 771.95 531.65 789.95 R
7 X
0 K
V
1 12 Q
0 X
(- 18 -) 302.66 781.95 T
99.65 96.95 531.65 744.95 R
7 X
V
0 X
-0.51 (alternative method for taking account of gaps when compression differences are computed.) 99.65 736.95 P
2.18 (The method exploits a dice rolling analogy together with some \324standard\325 probability) 99.65 722.95 P
-0.33 (theory presented in [23]. There is insufficient space in this article to describe the method in) 99.65 708.95 P
-0.14 (detail. A brief outline of the method and how it is applied is presented in Section 5, below.) 99.65 694.95 P
0 F
(4.8.3) 99.65 660.95 T
(Gaps and compression scores) 135.65 660.95 T
1 F
1.75 (In general, our intuitions tell us that an alignment with few gaps and/or small ones is) 99.65 636.95 P
(\324better\325 than an alignment with many gaps and/or large ones.) 99.65 622.95 T
-0.16 (It would take us too far afield to discuss these issues in detail but it seems that both) 135.65 606.95 P
-0.23 (of the methods for taking account of gaps which have been outlined will tend to reduce the) 99.65 592.95 P
(sizes of compression differences, broadly in line with our intuitions:) 99.65 578.95 T
(\245) 99.65 562.95 T
-0.29 (In the case of \324learning\325, every gap requires the creation of new codes and these are) 135.65 562.95 P
0.92 (added to the \324cost\325 of encoding. So it is reasonably clear that, other things being) 135.65 548.95 P
-0.17 (equal, an alignment with many gaps will have a lower compression difference than) 135.65 534.95 P
2.33 (one with few gaps. It is less clear why long gaps should reduce compression) 135.65 520.95 P
-0.47 (differences more than small gaps. A possible reason is that, with long gaps, it is less) 135.65 506.95 P
0.25 (easy to find the \324correct\325 alignment and there are many \324wrong\325 alignments which) 135.65 492.95 P
(fragment one or more of the long gaps into a larger number of smaller ones.) 135.65 478.95 T
(\245) 99.65 462.95 T
-0.59 (In the case of the method used in SP61, the dice-rolling analogy on which it is based) 135.65 462.95 P
0.35 (has a clear implication that many gaps and long ones will reduce the compression) 135.65 448.95 P
(difference.) 135.65 434.95 T
0 F
(4.8.4) 99.65 400.95 T
(\324Invalid\325 alignments) 135.65 400.95 T
1 F
0.45 (All of the foregoing remarks about gaps assumes that the gaps occur in matches between) 99.65 376.95 P
0.45 (New patterns and Old patterns. However, there can also be gaps in matches between two) 99.65 362.95 P
2.02 (patterns both of which are from Old. Amongst the latter kind of alignment, there are) 99.65 348.95 P
3.18 (alignments containing \324mis-matches\325 between two patterns from Old, as in the two) 99.65 334.95 P
(alignments shown here:) 99.65 320.95 T
2.22 (In each of these two examples, there is ambiguity about the relative left-right) 135.65 230.95 P
0.68 (positions of \324) 99.65 216.95 P
4 F
1.62 (x) 164.3 216.95 P
1 F
0.68 (\325 and \324) 171.5 216.95 P
4 F
1.62 (y) 204.16 216.95 P
1 F
0.68 (\325 which means that it is not possible to unify the alignment into a) 211.35 216.95 P
0.51 (single sequence without making arbitrary assumptions about whether \324) 99.65 202.95 P
4 F
1.22 (x) 442.81 202.95 P
1 F
0.51 (\325 precedes \324) 450.01 202.95 P
4 F
1.22 (y) 506.96 202.95 P
1 F
0.51 (\325 or) 514.16 202.95 P
2 F
(vice versa.) 99.65 188.95 T
1 F
-0.2 (In some future \324learning\325 version of the SP model, it may be possible to handle this) 135.65 172.95 P
0.75 (kind of mis-match in the same kind of way as was discussed in Section 4.3.1. However,) 99.65 158.95 P
-0.72 (SP61 assumes that the patterns in Old are immutable. For this reason, it treats all alignments) 99.65 144.95 P
0.04 (like the two above as \324invalid\325. If that kind of mis-match is detected while an alignment is) 99.65 130.95 P
(being formed, the alignment discarded and thus excluded from any further processing.) 99.65 116.95 T
99.65 96.95 531.65 744.95 C
104.5 242.95 526.79 316.95 C
4 14 Q
0 X
0 K
(b) 175.11 276.32 T
(x) 155.91 276.32 T
(a) 136.72 253.73 T
(y) 155.91 253.73 T
(b) 175.11 253.73 T
(a) 136.72 276.32 T
4 12 Q
(or this) 199.92 275.07 T
4 14 Q
(x) 305.11 276.32 T
(b) 285.91 276.32 T
(a) 266.72 253.73 T
(b) 285.91 253.73 T
(y) 305.11 253.73 T
(a) 266.72 276.32 T
(a) 136.72 300.22 T
(b) 175.11 300.22 T
(a) 266.72 300.22 T
(b) 285.91 300.22 T
287.72 295.22 287.72 286.22 2 L
0.5 H
2 Z
N
269.72 295.22 269.72 286.22 2 L
N
178.72 295.22 178.72 286.22 2 L
N
139.72 295.22 139.72 286.22 2 L
N
288.72 272.72 288.72 263.72 2 L
N
270.72 272.72 270.72 263.72 2 L
N
179.72 272.72 179.72 263.72 2 L
N
140.72 272.72 140.72 263.72 2 L
N
99.65 96.95 531.65 744.95 C
-8.35 24.95 603.65 816.95 C
FMENDPAGE
%%EndPage: "18" 19
%%Page: "19" 19
595.3 841.9 0 FMBEGINPAGE
99.65 771.95 531.65 789.95 R
7 X
0 K
V
1 12 Q
0 X
(- 19 -) 302.66 781.95 T
99.65 96.95 531.65 744.95 R
7 X
V
0 F
0 X
(4.9) 99.65 736.95 T
(Decoding by compression) 135.65 736.95 T
1 F
0.12 (Although it is not particularly relevant to PrbRs, we may note in passing that, paradoxical) 99.65 712.95 P
1 (as it may seem, the process of \324decoding\325 an encoded version of a sentence so that it is) 99.65 698.95 P
0.05 (expanded to recreate the original sentence may be achieved by) 99.65 684.95 P
2 F
0.05 (exactly) 403.26 684.95 P
1 F
0.05 ( the same processes) 437.23 684.95 P
1.93 (of ICMAUS as is used to achieve the original compression. How this can be done is) 99.65 670.95 P
(explained in Section 6 of [42].) 99.65 656.95 T
0 F
(5) 99.65 622.95 T
(Calculating the compression associated with an alignment) 135.65 622.95 T
1 F
0.36 (The method of calculating the) 99.65 598.95 P
2 F
0.36 (compression difference) 248.35 598.95 P
1 F
0.36 ( \050CD\051 associated with an alignment) 360.29 598.95 P
-0.64 (of patterns which was described informally in the last section is summarised in more formal) 99.65 584.95 P
(terms here. This is the method embodied in the SP61 model, outlined in Section 6.) 99.65 570.95 T
2.05 (The method is designed to calculate the compression of New information \050the) 135.65 554.95 P
0.55 (sentence to be parsed\051 which may be achieved by encoding New information in terms of) 99.65 540.95 P
2.9 (Old information \050where Old information is the patterns of symbols representing the) 99.65 526.95 P
(grammar used in parsing\051. This CD is calculated as:) 99.65 512.95 T
(CD = B) 135.65 496.95 T
1 10 Q
(N) 173.07 493.95 T
1 12 Q
( - B) 180.28 496.95 T
1 10 Q
(E) 198.27 493.95 T
1 12 Q
0.14 (where B) 99.65 480.95 P
1 10 Q
0.12 (N) 140.09 477.95 P
1 12 Q
0.14 ( is the number of bits required to represent in \324raw\325 form \050without any encoding) 147.3 480.95 P
1.28 (except modified S-F-E coding at the level of single symbols\051 those symbols from New) 99.65 466.95 P
-0.26 (which form hits with symbols from Old in the alignment. B) 99.65 452.95 P
1 10 Q
-0.22 (E) 382.54 449.95 P
1 12 Q
-0.26 ( is the number of bits required) 388.65 452.95 P
-0.14 (for the encoding of that New information in terms of Old information.) 99.65 438.95 P
1 10 Q
-0.12 (15) 434.49 443.75 P
1 12 Q
-0.14 ( How these values) 444.48 438.95 P
(are calculated is described below.) 99.65 424.95 T
0 F
(5.1) 99.65 390.95 T
(Information costs of) 135.65 390.95 T
(symbols) 241.91 390.95 T
1 F
(If a simple block code is used for symbols, then the) 99.65 366.95 T
2 F
(minimum cost) 349.48 366.95 T
1 F
(, M, for each symbol is) 416.44 366.95 T
(M = log) 135.65 350.95 T
1 10 Q
(2) 174.4 347.95 T
5 12 Q
(|) 179.4 350.95 T
1 F
(A) 181.79 350.95 T
5 F
(|) 190.45 350.95 T
1 F
1.26 (bits where) 99.65 334.95 P
5 F
1.26 (|) 154.79 334.95 P
1 F
1.26 (A| is the number of symbol types in the alphabet of symbol types \050A\051 used) 157.19 334.95 P
1.64 (throughout New and Old. If we know that a binary alphabet is to be used, M may be) 99.65 320.95 P
(rounded up to the nearest integer, thus: M =) 99.65 306.95 T
5 F
(\351) 312.94 306.95 T
1 F
(log) 317.54 306.95 T
1 10 Q
(2) 332.87 303.95 T
5 12 Q
(|) 337.87 306.95 T
1 F
(A) 340.27 306.95 T
5 F
(|\371) 348.93 306.95 T
1 F
( bits.) 355.93 306.95 T
0 F
(5.1.1) 99.65 272.95 T
(Variable-length codes) 135.65 272.95 T
1 F
0.94 (If there are variations in the frequencies of symbols, as there normally are, we can save) 99.65 248.95 P
0.77 (space by using variable-length codes for each symbol such as one might construct using) 99.65 234.95 P
(Huffman coding \050see [8]\051.) 99.65 220.95 T
99.65 196.95 531.65 211.93 C
99.65 196.95 531.65 211.93 R
7 X
0 K
V
108.65 209.91 252.65 209.91 2 L
V
0.5 H
2 Z
0 X
N
-8.35 24.95 603.65 816.95 C
1 12 Q
0 X
0 K
-0.19 (15. An obvious alternative to CD as a measure of compression is \324compression ratio\325 \050CR\051) 99.65 188.95 P
0.54 (calculated as CR = B) 99.65 174.95 P
1 10 Q
0.45 (E) 203.18 171.95 P
1 12 Q
0.54 ( / B) 209.28 174.95 P
1 10 Q
0.45 (N) 227.7 171.95 P
1 12 Q
0.54 (. For some purposes, CR may be preferable to CD because it) 234.91 174.95 P
0.1 (expresses the \324efficiency\325 of compression in a manner which is largely independent of the) 99.65 160.95 P
0.82 (absolute sizes of B) 99.65 146.95 P
1 10 Q
0.69 (E) 192.39 143.95 P
1 12 Q
0.82 ( or B) 198.49 146.95 P
1 10 Q
0.69 (N) 224.12 143.95 P
1 12 Q
0.82 (. But, for the purpose of guiding a search for MAs which are) 231.34 146.95 P
0.25 (\324good\325 in terms of the compression of New in terms of Old, CR has the disadvantage that) 99.65 132.95 P
-0.6 (it can give spurious prominence to alignments which encode a small part of New efficiently) 99.65 118.95 P
(but which neglect the major part of New. The CD measure does not have this bias.) 99.65 104.95 T
FMENDPAGE
%%EndPage: "19" 20
%%Page: "20" 20
595.3 841.9 0 FMBEGINPAGE
99.65 771.95 531.65 789.95 R
7 X
0 K
V
1 12 Q
0 X
(- 20 -) 302.66 781.95 T
99.65 96.95 531.65 744.95 R
7 X
V
0 X
-0.27 (Although Huffman codes are well-known and efficient, they have the disadvantage) 135.65 736.95 P
1.9 (for present purposes that they can lead to inaccuracies when codes are translated into) 99.65 722.95 P
-0.4 (probabilities \050considered in [39]\051. Codes which are constructed by the Shannon-Fano-Elias) 99.65 708.95 P
0.97 (\050S-F-E\051 method \050see [8]\051 are slightly less efficient but avoid the inaccuracies which can) 99.65 694.95 P
0.91 (arise if Huffman codes are translated into probabilities. However, for reasons connected) 99.65 680.95 P
0.43 (with the practicalities of coding and decoding in a binary environment, the lengths of the) 99.65 666.95 P
-0.46 (codes produced by the S-F-E method are, themselves, slightly too long. For that reason, the) 99.65 652.95 P
1.24 (SP61 model \050Section 6\051 calculates the size of the code for each symbol type using this) 99.65 638.95 P
(formula:) 99.65 624.95 T
(M) 135.65 608.95 T
2 10 Q
(i) 146.31 605.95 T
1 12 Q
( = log) 149.09 608.95 T
1 10 Q
(2) 177.18 605.95 T
1 12 Q
(\0501 /) 182.17 608.95 T
2 F
(p) 201.5 608.95 T
2 10 Q
(i) 207.49 605.95 T
1 12 Q
(\051) 210.27 608.95 T
0.81 (where M) 99.65 592.95 P
2 10 Q
0.68 (i) 143.42 589.95 P
1 12 Q
0.81 ( is the minimum cost of the) 146.2 592.95 P
2 F
0.81 (i) 286.17 592.95 P
1 F
0.81 (th symbol type and) 289.51 592.95 P
2 F
0.81 (p) 387.39 592.95 P
2 10 Q
0.68 (i) 393.38 589.95 P
1 12 Q
0.81 ( is the probability of the) 396.16 592.95 P
2 F
0.81 (i) 518.98 592.95 P
1 F
0.81 (th) 522.32 592.95 P
-0.05 (symbol type. This formula gives a value which is slightly smaller than the size of the code) 99.65 578.95 P
0.75 (which would be produced by the S-F-E method \050M) 99.65 564.95 P
2 10 Q
0.62 (i) 350.81 561.95 P
1 12 Q
0.75 ( =) 353.58 564.95 P
5 F
0.75 (\351) 367.84 564.95 P
1 F
0.75 (log) 372.45 564.95 P
1 10 Q
0.62 (2) 387.78 561.95 P
1 12 Q
0.75 (\0501 /) 392.77 564.95 P
2 F
0.75 (p) 413.59 564.95 P
2 10 Q
0.62 (i) 419.59 561.95 P
1 12 Q
0.75 (\051) 422.37 564.95 P
5 F
0.75 (\371) 426.36 564.95 P
1 F
0.75 ( + 1\051, partly because) 430.97 564.95 P
-0.23 (there is no rounding up as there is in the S-F-E method. Rounding up is not used because it) 99.65 550.95 P
(assumes the use of a binary alphabet, an assumption which need not be true.) 99.65 536.95 T
0.56 (Although the formula is not totally accurate as a measure of minimum code sizes) 135.65 520.95 P
1.22 (that one could use in practice in a binary environment, it has the advantage for present) 99.65 506.95 P
-0.05 (purposes that it allows probabilities of patterns, symbols and alignments to be derived in a) 99.65 492.95 P
-0.17 (straightforward manner from the minimum cost of the symbols used to encode a pattern or) 99.65 478.95 P
(an alignment \050as described in [39]\051.) 99.65 464.95 T
0 F
(5.1.2) 99.65 430.95 T
(Probabilities and frequencies) 135.65 430.95 T
1 F
(For the calculation of M) 99.65 406.95 T
2 10 Q
(i) 216.24 403.95 T
1 12 Q
(, above,) 219.01 406.95 T
2 F
(p) 259.65 406.95 T
2 10 Q
(i) 265.64 403.95 T
1 12 Q
( is calculated as:) 268.42 406.95 T
2 F
(p) 135.65 390.95 T
2 10 Q
(i) 141.65 387.95 T
1 12 Q
( =) 144.42 390.95 T
2 F
(f) 157.18 390.95 T
2 10 Q
(i) 160.52 387.95 T
1 12 Q
( / F) 163.3 390.95 T
1.11 (where F is the total frequency of the) 99.65 374.95 P
5 F
1.11 (|) 285.09 374.95 P
1 F
1.11 (A| symbol types and) 287.48 374.95 P
2 F
1.11 (f) 392.95 374.95 P
2 10 Q
0.93 (i) 396.28 371.95 P
1 12 Q
1.11 ( is the frequency of the) 399.06 374.95 P
2 F
1.11 (i) 518.98 374.95 P
1 F
1.11 (th) 522.32 374.95 P
(symbol type.) 99.65 360.95 T
0.83 (The value of) 135.65 344.95 P
2 F
0.83 (f) 201.76 344.95 P
2 10 Q
0.69 (i) 205.1 341.95 P
1 12 Q
0.83 ( for each symbol type can be determined easily by counting if the) 207.87 344.95 P
-0.41 (\324raw\325 data is available from which a set of patterns was derived. If only a sample of the raw) 99.65 330.95 P
-0.36 (data is available, frequencies can be estimated, and should be acceptable for most purposes) 99.65 316.95 P
(providing that the sample is representative and reasonably large.) 99.65 302.95 T
(If only the derived patterns are available, values for) 135.65 286.95 T
2 F
(f) 386.11 286.95 T
2 10 Q
(i) 389.44 283.95 T
1 12 Q
( can be calculated as:) 392.22 286.95 T
1.15 (where) 99.65 223.95 P
2 F
1.15 (f) 133.09 223.95 P
2 10 Q
0.95 (j) 136.43 220.95 P
1 12 Q
1.15 ( is the \050notional\051 frequency of the) 139.21 223.95 P
2 F
1.15 (j) 310.77 223.95 P
1 F
1.15 (th pattern in the grammar \050illustrated by the) 314.1 223.95 P
0.28 (numbers on the right of Figure 3\051,) 99.65 209.95 P
2 F
0.28 ( o) 264.25 209.95 P
2 10 Q
0.24 (j) 273.53 206.95 P
1 12 Q
0.28 ( is the number of occurrences of the given symbol in) 276.31 209.95 P
(the) 99.65 195.95 T
2 F
(j) 117.3 195.95 T
1 F
(th pattern and P is the number of patterns in the grammar.) 120.64 195.95 T
-0.11 (The formula just given looks innocent enough but it can yield frequency values for) 135.65 179.95 P
2.99 (symbols which are too high. Consider, for example, a small \324world\325 containing the) 99.65 165.95 P
-0.21 (sequence of symbols \324) 99.65 151.95 P
4 F
-0.5 (A B C) 205.95 151.95 P
1 F
-0.21 (\325. It is possible to represent this \324world\325 with two patterns: \324) 240.93 151.95 P
4 F
-0.5 (A) 524.45 151.95 P
-0.63 (B) 99.65 137.95 P
1 F
-0.26 (\325 and \324) 106.84 137.95 P
4 F
-0.63 (B C) 137.62 137.95 P
1 F
-0.26 (\325, each with a frequency of 1. If the formula, above, is applied to this database,) 158.57 137.95 P
(the frequency of \324) 99.65 123.95 T
4 F
(B) 185.23 123.95 T
1 F
(\325 is calculated as 2, not 1 as it should be.) 192.43 123.95 T
1.35 (Although it is implausible in this example to use two patterns instead of one to) 135.65 107.95 P
99.65 96.95 531.65 744.95 C
100 235.95 531.29 282.95 C
2 12 Q
0 X
0 K
(f) 135.29 254.95 T
2 10 Q
(i) 138.63 251.95 T
1 12 Q
( =) 141.41 254.95 T
5 F
(\345 \050) 154.17 254.95 T
2 F
(f) 169.71 254.95 T
2 10 Q
(j) 173.04 251.95 T
5 12 Q
(\264) 178.82 254.95 T
2 F
(o) 188.4 254.95 T
2 10 Q
(j) 194.4 251.95 T
1 12 Q
(\051) 197.18 254.95 T
2 F
(j) 151.29 243.95 T
1 F
(=1) 154.63 243.95 T
(P) 153.29 265.95 T
99.65 96.95 531.65 744.95 C
-8.35 24.95 603.65 816.95 C
FMENDPAGE
%%EndPage: "20" 21
%%Page: "21" 21
595.3 841.9 0 FMBEGINPAGE
99.65 771.95 531.65 789.95 R
7 X
0 K
V
1 12 Q
0 X
(- 21 -) 302.66 781.95 T
99.65 96.95 531.65 744.95 R
7 X
V
0 X
0.47 (represent the \324raw\325 data, comparable situations are quite common in more realistic cases.) 99.65 736.95 P
2.29 (For example, the frequency of the grammatical symbol \324) 99.65 722.95 P
4 F
5.49 (N) 388.76 722.95 P
1 F
2.29 (\325 \050for noun\051 in the parsing) 395.96 722.95 P
0.41 (example presented in Section 2.1.1 is calculated as 2000 from the frequencies of patterns) 99.65 708.95 P
1.78 (shown in Figure 3 using the formula above. However, this overlooks the fact that the) 99.65 694.95 P
2.58 (frequencies shown in the figure are frequencies that would be obtained from \050good\051) 99.65 680.95 P
-0.11 (parsings of \324raw\325 data \050see Section 3\051. In these parsings, the \324) 99.65 666.95 P
4 F
-0.26 (N) 391.87 666.95 P
1 F
-0.11 (\325 symbol at the start of each) 399.07 666.95 P
-0.08 (word is unified with the corresponding \324) 99.65 652.95 P
4 F
-0.2 (N) 292.35 652.95 P
1 F
-0.08 (\325 symbol in the sentence pattern but it is counted) 299.55 652.95 P
-0.19 (twice, once in the word pattern and once in the sentence pattern. Thus the correct value for) 99.65 638.95 P
-0.24 (the frequency of \324) 99.65 624.95 P
4 F
-0.58 (N) 184.5 624.95 P
1 F
-0.24 (\325 in this example should be 1000 not 2000 as calculated by the formula.) 191.7 624.95 P
-0.05 (Although the formula can give frequency values for symbols which are too high in) 135.65 608.95 P
-0.2 (some cases, it has been assumed for the time being that the formula is sufficiently accurate) 99.65 594.95 P
(for most practical purposes.) 99.65 580.95 T
0 F
(5.1.3) 99.65 546.95 T
(\324Minimum\325 and \324actual\325 cost for each symbol) 135.65 546.95 T
1 F
0.48 (Given a value of M) 99.65 522.95 P
2 10 Q
0.4 (i) 194.82 519.95 P
1 12 Q
0.48 (, as described above, the minimum cost of each symbol is multiplied) 197.6 522.95 P
(by a) 99.65 508.95 T
2 F
(cost factor) 122.96 508.95 T
1 F
(,) 173.93 508.95 T
2 F
(c) 179.93 508.95 T
1 F
(, to obtain an) 185.26 508.95 T
2 F
(actual cost) 250.88 508.95 T
1 F
(, A) 303.19 508.95 T
2 10 Q
(i) 317.84 505.95 T
1 12 Q
(, thus:) 320.62 508.95 T
(A) 135.65 492.95 T
2 10 Q
(i) 144.31 489.95 T
1 12 Q
( = M) 147.09 492.95 T
2 10 Q
(i) 170.51 489.95 T
5 12 Q
(\264) 176.29 492.95 T
1 F
(c) 185.87 492.95 T
(c > 1.) 243.65 492.95 T
-0.41 (The cost factor must be bigger than 1 and is normally about 10 or 20. The purpose of actual) 99.65 476.95 P
(costs and appropriate values for the cost factor are discussed in Section 5.3, below.) 99.65 462.95 T
0 F
(5.2) 99.65 428.95 T
(Calculation of E, the minimum number of bits required for the encoding of a) 135.65 428.95 T
(given pattern in Old) 135.65 414.95 T
1 F
-0.11 (The calculation of B) 99.65 390.95 P
1 10 Q
-0.09 (E) 197.58 387.95 P
1 12 Q
-0.11 ( for any alignment requires a value for the) 203.68 390.95 P
2 F
-0.11 (encoding cost) 407.53 390.95 P
1 F
-0.11 (, E, for each) 473.7 390.95 P
(pattern from Old which appears in the alignment.) 99.65 376.95 T
2.75 (Since there is a frequency of occurrence associated with each pattern in any) 135.65 360.95 P
-0.3 (grammar, it is possible to calculate a minimum for the value of E for each pattern using the) 99.65 346.95 P
1.45 (S-F-E method or the formula used to calculate the minimum cost of each symbol type) 99.65 332.95 P
0.26 (\050Section 5.1\051. However, there is an alternative method of calculating E which, for present) 99.65 318.95 P
2.19 (purposes, appears to be more useful and which has been adopted in the SP61 model) 99.65 304.95 P
(described in Section 6.) 99.65 290.95 T
( In summary, the alternative method is to calculate E as) 135.65 274.95 T
-0.07 (where D) 99.65 219.95 P
2 10 Q
-0.06 (i) 140.53 216.95 P
1 12 Q
-0.07 ( is the M value for the) 143.31 219.95 P
2 F
-0.07 (i) 251.73 219.95 P
1 F
-0.07 (th symbol in a subsequence of) 255.06 219.95 P
2 F
-0.07 (n) 402.53 219.95 P
1 F
-0.07 ( \324discrimination\325 symbols) 408.52 219.95 P
0.51 (within the given pattern which identifies the pattern uniquely amongst the patterns in the) 99.65 205.95 P
(grammar without over-specifying the pattern.) 99.65 191.95 T
-0.52 (Ideally, the discrimination symbols for a pattern would be whatever subsequence of) 135.65 175.95 P
0.9 (the pattern was most distinctive of the pattern, regardless of the position of the symbols) 99.65 161.95 P
(within the pattern. However, in the SP61 model, two constraints have been imposed:) 99.65 147.95 T
(\245) 99.65 131.95 T
-0.13 (The simplifying assumption has been made that the discrimination symbols are the) 135.65 131.95 P
0.51 (smallest substring of one or more symbols starting at the beginning of the pattern) 135.65 117.95 P
1.05 (which enables the pattern to be identified uniquely within the grammar. For any) 135.65 103.95 P
99.65 96.95 531.65 744.95 C
100.65 231.95 530.65 270.95 C
1 12 Q
0 X
0 K
(E =) 138.65 249.95 T
5 F
(\345) 161.74 249.95 T
1 F
( D) 170.29 249.95 T
2 10 Q
(i) 181.94 246.95 T
2 12 Q
(n) 163.65 262.95 T
(i) 158.65 238.95 T
1 F
(=1) 161.98 238.95 T
99.65 96.95 531.65 744.95 C
-8.35 24.95 603.65 816.95 C
FMENDPAGE
%%EndPage: "21" 22
%%Page: "22" 22
595.3 841.9 0 FMBEGINPAGE
99.65 771.95 531.65 789.95 R
7 X
0 K
V
1 12 Q
0 X
(- 22 -) 302.66 781.95 T
99.65 96.95 531.65 744.95 R
7 X
V
0 X
1.8 (pattern, it is easy to discover what this substring is by a process of systematic) 135.65 736.95 P
-0.57 (comparison of candidate substrings with corresponding symbols in other patterns in) 135.65 722.95 P
(the grammar.) 135.65 708.95 T
0.69 (Although a constrained subsequence of symbols is used in calculating the) 171.65 692.95 P
2.04 (value of E for the pattern, this does not mean that a pattern can only ever be) 135.65 678.95 P
0.97 (recognised by those symbols and no others. In the SP61 model, a pattern can be) 135.65 664.95 P
(fully or partially recognised by any subsequence of its symbols.) 135.65 650.95 T
(\245) 99.65 634.95 T
0.79 (For each pattern which ends in a symbol containing the hash character \050\324#\325\051, this) 135.65 634.95 P
-0.31 (\324termination\325 symbol is added to the set of discrimination symbols for the pattern if) 135.65 620.95 P
(it is not otherwise there.) 135.65 606.95 T
-0.17 (The second constraint is not entirely satisfactory, not least because it conflicts with) 135.65 590.95 P
2.07 (the principle that all symbols should have the same status and none should have any) 99.65 576.95 P
3.16 (\324hidden\325 meaning. The constraint has been adopted to compensate for the fact that) 99.65 562.95 P
-0.49 (\324termination\325 symbols are largely redundant for the purpose of identifying patterns because) 99.65 548.95 P
0.93 (each one duplicates the information in the first symbol of the same pattern \050the primary) 99.65 534.95 P
-0.13 (function of \324termination\325 symbols is to mark the ends of patterns\051. This redundancy means) 99.65 520.95 P
1.53 (that, without the constraint, the method of calculating B) 99.65 506.95 P
1 10 Q
1.27 (E) 379.69 503.95 P
1 12 Q
1.53 ( \050see Section 5.5, below\051 can) 385.8 506.95 P
0.72 (sometimes fail to discriminate amongst alignments which clearly differ in their potential) 99.65 492.95 P
(for economical encoding.) 99.65 478.95 T
0 F
(5.3) 99.65 444.95 T
(Calculation of B) 135.65 444.95 T
0 10 Q
(N) 218.94 441.95 T
0 12 Q
( \050the number of bits required to express \324hit\325 symbols from) 226.16 444.95 T
(New in \324raw\325 form, without encoding\051) 135.65 428.62 T
1 F
(For any one alignment, B) 99.65 404.62 T
1 10 Q
(N) 221.91 401.62 T
1 12 Q
( is calculated as:) 229.12 404.62 T
-0.1 (where A) 99.65 345.62 P
2 10 Q
-0.08 (i) 140.51 342.62 P
1 12 Q
-0.1 ( is the actual cost of the symbol in New corresponding to the) 143.28 345.62 P
2 F
-0.1 (i) 436.11 345.62 P
1 F
-0.1 (th hit in a sequence) 439.45 345.62 P
-0.67 (of hits,) 99.65 331.62 P
2 F
-0.67 (H) 134.63 331.62 P
1 10 Q
-0.56 (1) 143.29 328.62 P
1 12 Q
-0.67 ( ...) 148.29 331.62 P
2 F
-0.67 (H) 161.94 331.62 P
2 10 Q
-0.56 (h) 170.6 328.62 P
1 12 Q
-0.67 (, with an adjustment to be described in the next paragraph. The hit sequence) 175.6 331.62 P
2 F
0.1 (H) 99.65 317.62 P
1 10 Q
0.08 (1) 108.31 314.62 P
1 12 Q
0.1 ( ...) 113.3 317.62 P
2 F
0.1 (H) 128.5 317.62 P
2 10 Q
0.08 (h) 137.16 314.62 P
1 12 Q
0.1 ( comprises the hits between symbols in New and symbols in patterns in Old. The) 142.15 317.62 P
-0.24 (symbols from New in this hit sequence are a subsequence of the sequence) 99.65 303.62 P
2 F
-0.24 (N) 453.95 303.62 P
1 10 Q
-0.2 (1) 461.95 300.62 P
1 12 Q
-0.24 ( ...) 466.94 303.62 P
2 F
-0.24 (N) 481.46 303.62 P
2 8 Q
-0.16 (n) 489.46 301.12 P
1 10 Q
-0.2 (, which is) 493.45 303.62 P
(the pattern in New.) 99.65 289.62 T
0 12 Q
(5.3.1) 99.65 255.62 T
(The \324cost factor\325 and the \324actual cost\325 of symbols) 135.65 255.62 T
1 F
-0.14 (The actual cost of a symbol \050described in Section 5.1\051 is used) 99.65 231.62 P
2 F
-0.14 (only) 395.45 231.62 P
1 F
-0.14 ( when it appears in New) 416.1 231.62 P
2.14 (and its sole purpose is for the calculation of B) 99.65 217.62 P
1 10 Q
1.78 (N) 339.08 214.62 P
1 12 Q
2.14 (, as described above. The reason for) 346.3 217.62 P
1.55 (introducing this idea and the cost factor described in the same section is that we must) 99.65 203.62 P
-0.67 (suppose that each symbol in New is an analogue of a relatively large \324chunk\325 of information) 99.65 189.62 P
1.66 (\050e.g., a section of the sound wave of speech\051 while the corresponding symbol when it) 99.65 175.62 P
4.51 (appears in Old is an analogue of a relatively short \324code\325 which represents the) 99.65 161.62 P
(corresponding chunk of information in New.) 99.65 147.62 T
-0.63 (In general, codes must be smaller than the things they encode. If this does not obtain) 135.65 131.62 P
(then, clearly, no compression can be achieved.) 99.65 117.62 T
99.65 96.95 531.65 744.95 C
100 357.62 531.29 400.62 C
1 12 Q
0 X
0 K
(B) 139.76 376.78 T
1 10 Q
(n) 147.76 373.78 T
1 12 Q
(=) 155.25 376.78 T
5 F
(\345A) 165.01 376.78 T
2 10 Q
(i) 182.22 373.78 T
2 12 Q
(i) 159.21 363.78 T
1 F
(=) 162.54 363.78 T
1 10 Q
(1) 169.31 363.78 T
2 F
(h) 164.26 390.78 T
99.65 96.95 531.65 744.95 C
-8.35 24.95 603.65 816.95 C
FMENDPAGE
%%EndPage: "22" 23
%%Page: "23" 23
595.3 841.9 0 FMBEGINPAGE
99.65 771.95 531.65 789.95 R
7 X
0 K
V
1 12 Q
0 X
(- 23 -) 302.66 781.95 T
99.65 96.95 531.65 744.95 R
7 X
V
0 F
0 X
(5.4) 99.65 736.95 T
(Allowing for Gaps) 135.65 736.95 T
1 F
-0.3 (Before the above formula is applied, the value of each A) 99.65 712.95 P
2 10 Q
-0.25 (i) 368.09 709.95 P
1 12 Q
-0.3 ( is adjusted to take account of any) 370.87 712.95 P
0.23 (\324gap\325 which may exist between the given hit and any previous hits in the sequence of hits) 99.65 698.95 P
0.64 (between New and patterns in Old. For this purpose, the alignment is treated as if it were) 99.65 684.95 P
1.38 (two sequences of symbols: the sequence of symbols which is New \050the sentence being) 99.65 670.95 P
-0 (parsed\051 and the sequence of symbols which is the projection of the alignment into a single) 99.65 656.95 P
(sequence.) 99.65 642.95 T
2.55 (As indicated above, there is insufficient space to present fully the method of) 135.65 626.95 P
-0.38 (allowing for gaps. In outline, it is based on an analogy with the rolling of two A-sided dice,) 99.65 612.95 P
0.12 (where A is the size of the alphabet used in New and Old. The sequence of rolls of one die) 99.65 598.95 P
-0.44 (corresponds with the sequence of symbols in New and the sequence of rolls of the other die) 99.65 584.95 P
0.43 (corresponds with the sequence of symbols in the projection. The method is based closely) 99.65 570.95 P
2.86 (on the method described in [23] \050Chapter 3\051 for calculating probabilities of various) 99.65 556.95 P
(contingencies in problems of this type.) 99.65 542.95 T
0.31 (For the symbol corresponding to the) 135.65 526.95 P
2 F
0.31 (i) 314.71 526.95 P
1 F
0.31 (th hit in the sequence) 318.04 526.95 P
2 F
0.31 (H) 424.5 526.95 P
1 10 Q
0.25 (1) 433.16 523.95 P
1 12 Q
0.31 ( ...) 438.15 526.95 P
2 F
0.31 (H) 453.76 526.95 P
2 10 Q
0.25 (h) 462.42 523.95 P
1 12 Q
0.31 (, the adjusted) 467.41 526.95 P
(value of A) 99.65 512.95 T
2 10 Q
(i) 150.27 509.95 T
1 12 Q
( is calculated as:) 153.05 512.95 T
0.24 (where, a) 99.65 470.95 P
2 10 Q
0.2 (i) 140.51 467.95 P
1 12 Q
0.24 ( is the actual cost of the symbol corresponding to the) 143.29 470.95 P
2 F
0.24 (i) 402.13 470.95 P
1 F
0.24 (th hit in) 405.46 470.95 P
2 F
0.24 (H) 446.51 470.95 P
1 10 Q
0.2 (1) 455.17 467.95 P
1 12 Q
0.24 ( ....) 460.17 470.95 P
2 F
0.24 (H) 478.64 470.95 P
2 10 Q
0.2 (h) 487.3 467.95 P
1 12 Q
0.24 (, and G) 492.3 470.95 P
2 10 Q
0.2 (s) 527.76 467.95 P
1 12 Q
-0.29 (is the) 99.65 456.95 P
2 F
-0.29 (s) 127.71 456.95 P
1 F
-0.29 (th entry in a table of \324scaling factors\325 which is calculated at the outset of processing.) 132.38 456.95 P
(The value of G) 99.65 442.95 T
1 10 Q
(1) 171.92 439.95 T
1 12 Q
( is always 1.) 176.92 442.95 T
0.29 (For each hit in) 135.65 426.95 P
2 F
0.29 (H) 209.43 426.95 P
1 10 Q
0.24 (1) 218.09 423.95 P
1 12 Q
0.29 ( ...) 223.08 426.95 P
2 F
0.29 (. H) 235.37 426.95 P
2 10 Q
0.24 (h) 250.31 423.95 P
1 12 Q
0.29 ( after the first, the variable) 255.31 426.95 P
2 F
0.29 (s) 387.27 426.95 P
1 F
0.29 ( \050which represents the \324span\325) 391.94 426.95 P
(between the current hit in) 99.65 412.95 T
2 F
(H) 225.22 412.95 T
1 10 Q
(1) 233.88 409.95 T
1 12 Q
( ...) 238.88 412.95 T
2 F
(H) 253.87 412.95 T
2 10 Q
(h) 262.53 409.95 T
1 12 Q
( and the preceding hit\051 is calculated as:) 267.52 412.95 T
0.11 (where P) 99.65 367.95 P
2 10 Q
0.09 (i) 138.72 364.95 P
1 12 Q
0.11 ( is the position in) 141.5 367.95 P
2 F
0.11 (N) 227.66 367.95 P
1 10 Q
0.09 (1) 235.66 364.95 P
1 12 Q
0.11 ( ...) 240.66 367.95 P
2 F
0.11 ( N) 252.76 367.95 P
2 8 Q
0.07 (n) 263.86 365.45 P
1 12 Q
0.11 ( of the symbol corresponding to the) 267.86 367.95 P
2 F
0.11 (i) 442.16 367.95 P
1 F
0.11 (th hit in) 445.49 367.95 P
2 F
0.11 (H) 486.13 367.95 P
1 10 Q
0.09 (1) 494.79 364.95 P
1 12 Q
0.11 ( ...) 499.79 367.95 P
2 F
0.11 (H) 514.99 367.95 P
2 10 Q
0.09 (h) 523.65 364.95 P
1 12 Q
0.11 (,) 528.65 367.95 P
-0.28 (P) 99.65 353.95 P
2 10 Q
-0.24 (i) 106.32 350.95 P
1 F
-0.24 (-1) 109.1 350.95 P
1 12 Q
-0.28 ( is the position in) 117.42 353.95 P
2 F
-0.28 (N) 201.64 353.95 P
1 10 Q
-0.24 (1) 209.64 350.95 P
1 12 Q
-0.28 ( ...) 214.63 353.95 P
2 F
-0.28 (N) 229.06 353.95 P
2 8 Q
-0.19 (n) 237.06 351.45 P
1 12 Q
-0.28 ( of the symbol corresponding to the) 241.06 353.95 P
2 F
-0.28 (\050i) 412.63 353.95 P
1 F
-0.28 (-1\051th hit in) 419.96 353.95 P
2 F
-0.28 (H) 473.42 353.95 P
1 10 Q
-0.24 (1) 482.08 350.95 P
1 12 Q
-0.28 ( ...) 487.07 353.95 P
2 F
-0.28 (H) 501.5 353.95 P
2 10 Q
-0.24 (h) 510.16 350.95 P
1 12 Q
-0.28 (. C) 515.16 353.95 P
2 10 Q
-0.24 (i) 528.87 350.95 P
1 12 Q
2.46 (and C) 99.65 339.95 P
2 10 Q
2.05 (i) 130.43 336.95 P
1 F
2.05 (-1) 133.21 336.95 P
1 12 Q
2.46 ( are the analogous positions in the projection of the alignment into a single) 141.53 339.95 P
(sequence - which means that C) 99.65 325.95 T
2 10 Q
(i) 248.53 322.95 T
1 12 Q
( and C) 251.31 325.95 T
2 10 Q
(i) 282.63 322.95 T
1 F
(-1) 285.41 322.95 T
1 12 Q
( represent columns in the alignment itself.) 293.73 325.95 T
0 F
(5.5) 99.65 291.95 T
(Calculation of B) 135.65 291.95 T
0 10 Q
(E) 218.94 288.95 T
0 12 Q
( \050the number of bits required for the encoding of New) 225.61 291.95 T
(information in terms of Old information\051) 135.65 275.62 T
1 F
-0.18 (In the long run, the most satisfactory way of calculating B) 99.65 251.62 P
1 10 Q
-0.15 (E) 376.7 248.62 P
1 12 Q
-0.18 ( \050the number of bits required to) 382.81 251.62 P
-0.31 (encode an alignment\051 is simply to create an explicit code from the alignment in the manner) 99.65 237.62 P
0.5 (described in Sections 4.2.2, 4.2.3 and 4.2.4 and then simply add up the number of bits in) 99.65 223.62 P
(the symbols which make up the code.) 99.65 209.62 T
1.87 (The problem, at present, is that the kind of code just mentioned does not take) 135.65 193.62 P
-0.27 (account of gaps in a sequence of hits and the method of correcting for gaps which has been) 99.65 179.62 P
2.02 (adopted for the time being does not give rise to explicit codes. In Section 4.3 it was) 99.65 165.62 P
0.45 (suggested that, when the model has been generalised for \324learning\325, it may be possible to) 99.65 151.62 P
0.04 (encode gaps by means of explicit codes. If that turns out to be true, then the calculation of) 99.65 137.62 P
0.79 (B) 99.65 123.62 P
1 10 Q
0.66 (E) 107.65 120.62 P
1 12 Q
0.79 ( can certainly be done by adding up the bits in the symbols which make up the code.) 113.75 123.62 P
(Pending these developments, the method which is used for calculating B) 99.65 109.62 T
1 10 Q
(E) 447.76 106.62 T
1 12 Q
( is as follows.) 453.86 109.62 T
99.65 96.95 531.65 744.95 C
104.15 482.95 527.15 508.95 C
1 12 Q
0 X
0 K
(A) 138.15 493.95 T
2 10 Q
(i) 146.81 490.95 T
1 12 Q
( = a) 149.59 493.95 T
2 10 Q
(i) 167.67 490.95 T
5 12 Q
(\264) 173.45 493.95 T
1 F
(G) 180.03 493.95 T
2 10 Q
(s) 188.69 490.95 T
99.65 96.95 531.65 744.95 C
-8.35 24.95 603.65 816.95 C
99.65 96.95 531.65 744.95 C
101.65 379.95 529.65 408.95 C
2 12 Q
0 X
0 K
(s) 139.65 390.95 T
1 F
( = \050P) 144.31 390.95 T
2 10 Q
(i) 167.74 387.95 T
1 12 Q
( - P) 170.51 390.95 T
2 10 Q
(i) 187.17 387.95 T
1 F
(-1) 189.95 387.95 T
1 12 Q
(\051) 198.28 390.95 T
5 F
(\264) 205.27 390.95 T
1 F
( \050C) 211.85 390.95 T
2 10 Q
(i) 226.85 387.95 T
1 12 Q
( - C) 229.62 390.95 T
2 10 Q
(i) 247.62 387.95 T
1 F
(-1) 250.39 387.95 T
1 12 Q
(\051) 258.72 390.95 T
99.65 96.95 531.65 744.95 C
-8.35 24.95 603.65 816.95 C
FMENDPAGE
%%EndPage: "23" 24
%%Page: "24" 24
595.3 841.9 0 FMBEGINPAGE
99.65 771.95 531.65 789.95 R
7 X
0 K
V
1 12 Q
0 X
(- 24 -) 302.66 781.95 T
99.65 96.95 531.65 744.95 R
7 X
V
0 X
(For each new alignment, the value of B) 135.65 736.95 T
1 10 Q
(E) 324.84 733.95 T
1 12 Q
( is:) 330.95 736.95 T
-0.62 (where E) 99.65 673.95 P
2 10 Q
-0.51 (i) 138.66 670.95 P
1 12 Q
-0.62 ( is the encoding cost of the Old pattern appearing on one of) 141.43 673.95 P
2 F
-0.62 (a) 420.86 673.95 P
1 F
-0.62 ( lines of the alignment) 426.85 673.95 P
0.39 (other than the top line \050where New appears\051 and S is the saving in encoding costs arising) 99.65 659.95 P
0.2 (from the fact that some patterns in the alignment convey information about the sequential) 99.65 645.95 P
1.54 (arrangement of other patterns in the alignment or the selection of other patterns in the) 99.65 631.95 P
(alignment where alternatives are possible in a given context.) 99.65 617.95 T
1.04 (The encoding cost of any pattern is the value of E for that pattern, calculated as) 135.65 601.95 P
2.04 (described in Section 5.2. Notice that if any pattern appears two or more times in the) 99.65 587.95 P
1.95 (alignment, its encoding cost is added a corresponding number of times to the sum of) 99.65 573.95 P
(encoding costs.) 99.65 559.95 T
(The calculation of B) 135.65 543.95 T
1 10 Q
(E) 233.91 540.95 T
1 12 Q
( depends on three main ideas:) 240.02 543.95 T
(\245) 99.65 527.95 T
3.49 (As previously noted, a pattern may be fully or partially recognised by any) 135.65 527.95 P
0.83 (subsequence of the pattern. In other words, it is not necessary to use the specific) 135.65 513.95 P
-0.38 (symbols which were used in calculating the value of E for that pattern. As a general) 135.65 499.95 P
-0.47 (rule when the grammar is largely free of redundancy, if the M values of the relevant) 135.65 485.95 P
3.26 (symbols \050adjusted for gaps - see next\051 add up to the value of E, then that) 135.65 471.95 P
2.96 (subsequence of symbols will identify the pattern uniquely amongst the other) 135.65 457.95 P
-0.31 (patterns in the grammar. Where there is redundancy in the grammar, more bits may) 135.65 443.95 P
(be needed to achieve unique identification of a pattern.) 135.65 429.95 T
(\245) 99.65 413.95 T
1.95 (If there are gaps in a sequence of hits, information values must be reduced in) 135.65 413.95 P
(accordance with the rules used in calculating the value of B) 135.65 399.95 T
1 10 Q
(N) 421.43 396.95 T
1 12 Q
( \050Section 5.4\051) 428.64 399.95 T
(\245) 99.65 381.62 T
0.15 (For present purposes, there is nothing to be gained by over-specifying a pattern. If) 135.65 381.62 P
2.41 (one pattern matches a second pattern by the minimum number of symbols to) 135.65 367.62 P
-0.45 (achieve identification of that second pattern, then the saving in encoding costs from) 135.65 353.62 P
-0.67 (this source is maximal. Any additional hits between the two patterns do not give any) 135.65 339.62 P
(additional saving in encoding costs.) 135.65 325.62 T
(B) 135.65 309.62 T
1 10 Q
(E) 143.65 306.62 T
1 12 Q
( is calculated in the following way:) 149.75 309.62 T
(1) 99.65 293.62 T
1 (For each row \050R\051 in the alignment corresponding to a pattern from Old, create a) 135.65 293.62 P
(variable \050V\051 containing the value of E for the pattern in that row.) 135.65 279.62 T
(2) 99.65 263.62 T
-0.03 (Traverse the alignment from left to right examining the columns containing two or) 135.65 263.62 P
0.27 (more symbols \050including symbols in New\051. Any such column is designated a \324hit\325) 135.65 249.62 P
(column \050C) 135.65 235.62 T
1 10 Q
(H) 186.62 232.62 T
1 12 Q
(\051.) 193.84 235.62 T
(3) 99.65 217.29 T
0.17 (For each C) 135.65 217.29 P
1 10 Q
0.14 (H) 188.62 214.29 P
1 12 Q
0.17 ( which contains two or more symbols from patterns in Old \050which we) 195.83 217.29 P
0.96 (may designate C) 135.65 200.95 P
1 10 Q
0.8 (HO) 217.52 197.95 P
1 12 Q
0.96 (\051, examine each row which has a hit symbol from Old in the) 231.96 200.95 P
-0.2 (column \050designated R) 135.65 184.62 P
1 10 Q
-0.17 (HO) 240.51 181.62 P
1 12 Q
-0.2 (\051. For this symbol, calculate M) 254.95 184.62 P
1 10 Q
-0.17 (A) 401.53 181.62 P
1 12 Q
-0.2 (, an \324adjusted\325 value of M) 408.74 184.62 P
-0.08 (for the symbol, taking account of any gap which may exist between the given C) 135.65 168.29 P
1 10 Q
-0.07 (HO) 517.22 165.29 P
1 12 Q
0.11 (and any previous C) 135.65 151.95 P
1 10 Q
0.09 (H) 228.92 148.95 P
1 12 Q
0.11 (. The method of making the adjustment is the same as is used) 236.14 151.95 P
-0.31 (for calculating the value of B) 135.65 135.62 P
1 10 Q
-0.26 (N) 274.31 132.62 P
1 12 Q
-0.31 ( \050Section 5.4\051 except that, for each R) 281.53 135.62 P
1 10 Q
-0.26 (HO) 455.54 132.62 P
1 12 Q
-0.31 (, the gaps \050or) 469.97 135.62 P
1.21 (spans\051 are measured as if all the rows in the alignment) 135.65 119.29 P
2 F
1.21 (except) 412.44 119.29 P
1 F
1.21 ( the given R) 443.07 119.29 P
1 10 Q
1.01 (HO) 505.01 116.29 P
1 12 Q
1.21 ( is) 519.44 119.29 P
99.65 96.95 531.65 744.95 C
102.15 685.95 529.15 732.95 C
1 12 Q
0 X
0 K
(B) 172.15 708.62 T
1 10 Q
(E) 180.15 705.62 T
1 12 Q
( =) 186.25 708.62 T
5 F
(\345 E) 199.01 708.62 T
2 10 Q
(i) 217.89 705.62 T
1 12 Q
( - S) 220.67 708.62 T
2 F
(i) 193.15 696.62 T
1 F
(=2) 196.48 696.62 T
2 F
(a) 197.15 722.62 T
99.65 96.95 531.65 744.95 C
-8.35 24.95 603.65 816.95 C
FMENDPAGE
%%EndPage: "24" 25
%%Page: "25" 25
595.3 841.9 0 FMBEGINPAGE
99.65 771.95 531.65 789.95 R
7 X
0 K
V
1 12 Q
0 X
(- 25 -) 302.66 781.95 T
99.65 96.95 531.65 744.95 R
7 X
V
0 X
-0.48 (treated as if it were a single pattern to which the pattern in the given R) 135.65 736.95 P
1 10 Q
-0.4 (HO) 465.87 733.95 P
1 12 Q
-0.48 ( is aligned.) 480.3 736.95 P
0.58 (As in the calculation of B) 135.65 720.62 P
1 10 Q
0.49 (N) 261.49 717.62 P
1 12 Q
0.58 (, it is assumed that there is no gap associated with the) 268.71 720.62 P
(first C) 135.65 704.29 T
1 10 Q
(H) 165.97 701.29 T
1 12 Q
( for any given pattern.) 173.18 704.29 T
(4) 99.65 685.95 T
0.31 (For each C) 135.65 685.95 P
1 10 Q
0.26 (HO) 188.9 682.95 P
1 12 Q
0.31 (, examine each R) 203.33 685.95 P
1 10 Q
0.26 (HO) 286.86 682.95 P
1 12 Q
0.31 ( and, amongst these rows, identify the \324leading\325) 301.29 685.95 P
-0.41 (row, R) 135.65 669.62 P
1 10 Q
-0.34 (HOL) 167.88 666.62 P
1 12 Q
-0.41 (, whose pattern starts furthest to the left in the alignment \050if there is a tie,) 188.42 669.62 P
0.28 (make an arbitrary choice amongst the ties\051. For example, in Figure 4, for either of) 135.65 653.29 P
0.34 (the two columns which contains a hit between \324) 135.65 639.29 P
4 F
0.82 (D) 366.9 639.29 P
1 F
0.34 (\325 in) 374.1 639.29 P
4 F
0.82 (\324D 0 t h i s #D) 394.11 639.29 P
1 F
0.34 (\325 and) 506.99 639.29 P
-0.19 (\324) 135.65 625.29 P
4 F
-0.45 (D) 139.64 625.29 P
1 F
-0.19 (\325 in \324) 146.84 625.29 P
4 F
-0.45 (NP D #D N #N #NP) 169.78 625.29 P
1 F
-0.19 (\325, the R) 282.65 625.29 P
1 10 Q
-0.16 (HOL) 317.92 622.29 P
1 12 Q
-0.19 ( is the one containing \324) 338.45 625.29 P
4 F
-0.45 (NP D #D N #N) 447.11 625.29 P
1.69 (#NP) 135.65 608.95 P
1 F
0.71 (\325 \050row 4 in the first case and row 9 in the second case\051; for either of the two) 157.24 608.95 P
-0.05 (columns containing a hit between \324) 135.65 594.95 P
4 F
-0.13 (NP) 303.59 594.95 P
1 F
-0.05 (\325 in \324) 317.98 594.95 P
4 F
-0.13 (NP D #D N #N #NP) 341.19 594.95 P
1 F
-0.05 (\325 and \324) 455.66 594.95 P
4 F
-0.13 (NP) 486.86 594.95 P
1 F
-0.05 (\325 in \324) 501.25 594.95 P
4 F
-0.13 (S) 524.45 594.95 P
0.49 (NP #NP V #V NP #NP #S) 135.65 580.95 P
1 F
0.2 (\325, the R) 289.69 580.95 P
1 10 Q
0.17 (HOL) 325.74 577.95 P
1 12 Q
0.2 ( is the row containing \324) 346.28 580.95 P
4 F
0.49 (S NP #NP V) 458.22 580.95 P
(#V NP #NP #S) 135.65 564.62 T
1 F
(\325 \050row 6 in both cases\051.) 222 564.62 T
(5) 99.65 548.62 T
1.55 (For each C) 135.65 548.62 P
1 10 Q
1.29 (HO) 191.38 545.62 P
1 12 Q
1.55 (, consider, in turn, each R) 205.81 548.62 P
1 10 Q
1.29 (HO) 336.81 545.62 P
1 12 Q
1.55 (,) 351.24 548.62 P
2 F
1.55 (excluding the) 358.79 548.62 P
1 F
1.55 (R) 429.18 548.62 P
1 10 Q
1.29 (HOL) 437.18 545.62 P
1 12 Q
1.55 (. For each row) 457.72 548.62 P
0.49 (considered, subtract the value of M) 135.65 532.29 P
1 10 Q
0.4 (A) 307.27 529.29 P
1 12 Q
0.49 ( from the value of V for that row. If the new) 314.49 532.29 P
0.03 (value of V is less than 0, V is set to 0 and no further subtraction from that instance) 135.65 515.95 P
(of V is allowed.) 135.65 501.95 T
(6) 99.65 485.95 T
0.46 (When all relevant columns have been examined and the values of the V variables) 135.65 485.95 P
(have been reduced, calculate) 135.65 471.95 T
-0.34 (where) 134.65 407.95 P
2 F
-0.34 (a) 166.61 407.95 P
1 F
-0.34 ( is the number of rows in the alignment and the summation excludes the top) 172.6 407.95 P
(line \050which contains New\051.) 135.65 393.95 T
0.3 (The rationale for this method of calculating B) 135.65 377.95 P
1 10 Q
0.25 (E) 357.28 374.95 P
1 12 Q
0.3 ( is that it gives us the sum of the E) 363.38 377.95 P
0.16 (values of the patterns from Old corresponding to each row of the alignment after the first,) 99.65 363.95 P
0.64 (with a reduction for hits between those patterns \050with an adjustment for gaps as outlined) 99.65 349.95 P
(above\051.) 99.65 335.95 T
-0.45 (The reason for reducing the value of B) 135.65 319.95 P
1 10 Q
-0.38 (E) 317.99 316.95 P
1 12 Q
-0.45 ( when there are hits between patterns in Old) 324.1 319.95 P
-0.16 (is that any such hit reflects a degree of \324coverage\325 of one pattern from Old by another such) 99.65 305.95 P
1.76 (pattern. To the extent that one pattern provides information that also exists in another) 99.65 291.95 P
-0.45 (pattern there is a reduced need for the second pattern to be identified in the encoding. In the) 99.65 277.95 P
0.27 (extreme case, where two patterns are identical, only one of them need be identified in the) 99.65 263.95 P
(encoding.) 99.65 249.95 T
-0.64 (As indicated above, any saving in encoding costs resulting from the coverage of one) 135.65 233.95 P
-0.35 (or more patterns by another cannot exceed the E value for each pattern - any additional hits) 99.65 219.95 P
(are \324wasted\325. Hence, the V value for any row cannot be reduced below 0.) 99.65 205.95 T
1 (In the method described above, the \324leading\325 row for any one column \050R) 135.65 189.95 P
1 10 Q
0.84 (HOL) 495.11 186.95 P
1 12 Q
1 (\051 is) 515.65 189.95 P
-0.31 (regarded as the row with which the other symbols in the column are unified. Hence, for the) 99.65 175.95 P
1.96 (given column, this is the row where the V value is) 99.65 161.95 P
2 F
1.96 (not) 365.09 161.95 P
1 F
1.96 ( reduced by the value of M) 380.42 161.95 P
1 10 Q
1.64 (A) 521.43 158.95 P
1 12 Q
1.96 (.) 528.65 161.95 P
2.05 (Intuitively, the left-to-right bias in the definition of \324leading row\325 is less theoretically) 99.65 147.95 P
0.46 (\324clean\325 than if all concepts were entirely symmetrical between left and right directions in) 99.65 133.95 P
0.44 (the alignment. However, the concepts as described are the best to have been found so far) 99.65 119.95 P
(and seem to work quite well.) 99.65 105.95 T
99.65 96.95 531.65 744.95 C
100 419.95 531.29 467.95 C
1 12 Q
0 X
0 K
(B) 172.15 440.24 T
1 10 Q
(E) 180.15 437.24 T
1 12 Q
( =) 186.25 440.24 T
5 F
(\345) 199.01 440.24 T
1 F
(V) 210.57 440.24 T
2 10 Q
(i) 219.22 437.24 T
2 12 Q
(i) 193.9 426.75 T
1 F
(=2) 197.24 426.75 T
2 F
(a) 198.9 452.75 T
99.65 96.95 531.65 744.95 C
-8.35 24.95 603.65 816.95 C
FMENDPAGE
%%EndPage: "25" 26
%%Page: "26" 26
595.3 841.9 0 FMBEGINPAGE
99.65 771.95 531.65 789.95 R
7 X
0 K
V
1 12 Q
0 X
(- 26 -) 302.66 781.95 T
99.65 96.95 531.65 744.95 R
7 X
V
0 F
0 X
(6) 99.65 736.95 T
(The SP61 model) 135.65 736.95 T
1 F
-0.44 (It is not appropriate to present a detailed description of the SP61 model here since the SP61) 99.65 712.95 P
-0.36 (model is similar to the SP52 model and a fairly full description of that model may be found) 99.65 698.95 P
0.15 (in [42]. The main difference between the two models is that SP61 has been generalised to) 99.65 684.95 P
0.28 (calculate probabilities of inferences. Also, in the development of the model, a fairly large) 99.65 670.95 P
(number of minor refinements were made.) 99.65 656.95 T
-0.65 (At the heart of both models is a process for finding alignments between two patterns) 135.65 640.95 P
0.04 (which are \324good\325 in terms of IC. An early version of this method for aligning two patterns) 99.65 626.95 P
(is described in [48] and a more refined version in [50].) 99.65 612.95 T
-0.51 (The process may be regarded as a form of dynamic programming \050see, for example,) 135.65 596.95 P
([30]\051 which differs from standard methods in two main ways:) 99.65 582.95 T
(\245) 99.65 566.95 T
3.54 (It can find alternative alignments between two patterns, graded in terms of) 135.65 566.95 P
(compression.) 135.65 552.95 T
(\245) 99.65 536.95 T
2.96 (It exploits list processing techniques so that arbitrarily long patterns may be) 135.65 536.95 P
(compared \050within the limits of the host machine\051.) 135.65 522.95 T
(\245) 99.65 506.95 T
1.46 (The thoroughness of searching, and thus the computational resources which are) 135.65 506.95 P
(required \050time or space or both\051, may be controlled with parameters.) 135.65 492.95 T
0.81 (For each pattern in New \050and there is normally only one\051, this process is applied) 135.65 476.95 P
1.44 (initially to compare the pattern in New with all the patterns in Old. From amongst the) 99.65 462.95 P
1.28 (alignments which are found, the best ones are \324unified\325 to convert the alignment into a) 99.65 448.95 P
0.69 (single sequence or pattern of symbols. This pattern is stored together with the alignment) 99.65 434.95 P
2.28 (from which it was derived. Any alignments which cannot be unified in this way are) 99.65 420.95 P
(discarded.) 99.65 406.95 T
-0.49 (For each pattern amongst the best of the unified alignments just formed, the process) 135.65 390.95 P
-0.34 (is repeated, the best alignments are unified to form simple patterns and these patterns \050with) 99.65 376.95 P
-0.29 (their alignments\051 are stored as before. This cycle is repeated in the same way until no more) 99.65 362.95 P
(alignments can be found.) 99.65 348.95 T
2.16 (The organisation of the model is outlined in Figure 6 using pseudocode. Two) 135.65 332.95 P
0.31 (aspects of the model, as described in the figure, need some clarification: the nature of the) 99.65 318.95 P
0.66 (\324hit structure\325 used in the model and the method by which alignments are selected at the) 99.65 304.95 P
(end of each) 99.65 290.95 T
2 F
(compress\050\051) 157.92 290.95 T
1 F
( cycle.) 211.21 290.95 T
FMENDPAGE
%%EndPage: "26" 27
%%Page: "27" 27
595.3 841.9 0 FMBEGINPAGE
99.65 771.95 531.65 789.95 R
7 X
0 K
V
1 12 Q
0 X
(- 27 -) 302.66 781.95 T
99.65 96.95 531.65 744.95 R
7 X
V
0 F
0 X
(main\050\051) 135.65 736.95 T
({) 135.65 721.95 T
1 F
(1 Read the rules \050patterns\051 of the knowledge base, each one with) 171.65 706.95 T
(a frequency of occurrence in an actual or notional sample) 207.65 691.95 T
-0.25 (from the World, and store the patterns with their frequencies in Old.) 207.65 676.95 P
(2 Read the pattern\050s\051 to be parsed and store it \050them\051 in New.) 171.65 661.95 T
-0.44 (3 From the frequency of each pattern, derive a frequency for each symbol in) 171.65 646.95 P
(Old \050as described in Section 5.1\051) 207.65 631.95 T
(4 Using the frequencies of the symbols with the method described in the) 171.65 616.95 T
(text \050Section 5.1\051, assign to each symbol type) 207.65 601.95 T
(in New and Old a number of bits representing the) 207.65 586.95 T
(\324minimum\325 information \324cost\325 of that symbol type. Also, calculate) 207.65 571.95 T
(an \324actual\325 information cost for each symbol type \050see Section 5.1\051.) 207.65 556.95 T
(5 For each pattern in Old, calculate E, the minimum number of bits needed) 171.65 541.95 T
(to encode that pattern \050see Section 5.2\051.) 207.65 526.95 T
(7) 171.65 511.95 T
0 F
(while) 180.64 511.95 T
(\050) 210.96 511.95 T
1 F
(there are more patterns in New to be encoded) 214.96 511.95 T
0 F
(\051) 432.77 511.95 T
({) 171.65 496.95 T
1 F
-0.47 (7.1 Select the first or next pattern in New to be encoded and add it as) 207.65 481.95 P
(the first \324driving pattern\325 to an otherwise empty list of) 243.65 466.95 T
(driving patterns.) 243.65 451.95 T
(7.2) 207.65 436.95 T
0 F
(while \050) 225.64 436.95 T
1 F
(there are more windows in the current pattern) 259.95 436.95 T
0 F
(\051) 478.77 436.95 T
({) 207.65 421.95 T
1 F
(7.1.1 Select the first or next \324window\325 from the) 243.65 406.95 T
(current pattern from New.) 279.65 391.95 T
(7.2.2) 243.65 376.95 T
0 F
(while \050) 270.63 376.95 T
1 F
(new alignments are being formed) 304.95 376.95 T
0 F
(\051) 465.49 376.95 T
(compress \050\051) 279.65 361.95 T
1 F
(7.2.3 Out of all the alignments which have been formed) 243.65 346.95 T
(for the current pattern) 279.65 331.95 T
2 F
(up and including the) 387.55 331.95 T
(current window) 279.65 316.95 T
1 F
(, print the ones that have been) 355.28 316.95 T
(selected by) 279.65 301.95 T
2 F
(compress\050\051) 336.27 301.95 T
1 F
(.) 389.56 301.95 T
0 F
(}) 207.65 286.95 T
1 F
(7.3 Out of all the new alignments which have been formed for) 207.65 271.95 T
(the entire current pattern from New, print the ones that) 243.65 256.95 T
(have been selected by) 243.65 241.95 T
2 F
(compress\050\051) 351.55 241.95 T
1 F
(.) 404.84 241.95 T
-0.22 (7.4 Identify a) 207.65 226.95 P
2 F
-0.22 (reference set) 274.27 226.95 P
1 F
-0.22 ( of alignments which encode all and only) 335.66 226.95 P
-0.5 (the same symbols from New as the best alignment. Calculate) 243.65 211.95 P
(absolute and relative probabilities for these alignments and) 243.65 196.95 T
(for the patterns and symbols in the alignment.) 243.65 181.95 T
0 F
(}) 171.65 166.95 T
(}) 135.65 151.95 T
1 F
(\050continued\051) 99.65 121.95 T
FMENDPAGE
%%EndPage: "27" 28
%%Page: "28" 28
595.3 841.9 0 FMBEGINPAGE
99.65 771.95 531.65 789.95 R
7 X
0 K
V
1 12 Q
0 X
(- 28 -) 302.66 781.95 T
99.65 96.95 531.65 744.95 R
7 X
V
0 X
(Figure 6 \050continued\051) 99.65 736.95 T
0 F
(compress\050\051) 135.65 713.95 T
({) 135.65 698.95 T
1 F
(1 Clear the \324hit structure\325 \050described in the text\051.) 171.65 683.95 T
(2) 171.65 668.95 T
0 F
( while \050) 177.65 668.95 T
1 F
(there are driving patterns which have not yet been processed) 214.96 668.95 T
0 F
(\051) 505.05 668.95 T
({) 171.65 653.95 T
1 F
(2.1 Select the first or next driving pattern in the set of) 207.65 638.95 T
(driving patterns.) 243.65 623.95 T
(2.2) 207.65 608.95 T
0 F
( while) 222.64 608.95 T
(\050) 255.96 608.95 T
1 F
(there are more symbols in the current driving pattern) 259.95 608.95 T
(or) 516.09 608.95 T
(window of the current pattern from New) 243.65 593.95 T
0 F
(\051) 438.16 593.95 T
({) 207.65 578.95 T
1 F
(2.2.1 Working left to right through the current) 243.65 563.95 T
(driving pattern \050or window if the current) 279.65 548.95 T
(driving pattern is from New\051, select the first or) 279.65 533.95 T
(next symbol in the pattern.) 279.65 518.95 T
(2.2.2 \324Broadcast\325 this symbol to make a yes/no match with) 243.65 503.95 T
(every symbol in the \324target patterns\325: the original) 279.65 488.95 T
(patterns in Old and the alignments in the store of) 279.65 473.95 T
(alignments formed thus far by the program.) 279.65 458.95 T
-0.44 (2.2.3 Record each positive match \050) 243.65 443.95 P
2 F
-0.44 (hit) 408.35 443.95 P
1 F
-0.44 (\051 in a \324hit structure\325 \050as) 421.02 443.95 P
(described in the text\051. As more symbols are) 279.65 428.95 T
(broadcast, the hit structure builds up a record) 279.65 413.95 T
(of sequences of hits between the driving pattern) 279.65 398.95 T
(and the several target patterns in Old and the store) 279.65 383.95 T
(of alignments. As each hit sequence is extended,) 279.65 368.95 T
(the compression score of the corresponding) 279.65 353.95 T
(alignment is estimated using a \324cheap to) 279.65 338.95 T
(compute\325 method of estimation.) 279.65 323.95 T
(2.2.4 If the space allocated for the hit structure is filled at) 243.65 308.95 T
(any time, the system \324purges\325 the worst hit) 279.65 293.95 T
(sequences from the hit structure to release) 279.65 278.95 T
(more space. The selection uses the estimates) 279.65 263.95 T
(of compression scores assigned to each) 279.65 248.95 T
(hit sequence in Step 2.2.3.) 279.65 233.95 T
0 F
(}) 207.65 218.95 T
(}) 171.65 203.95 T
1 F
(\050continued\051) 99.65 173.95 T
FMENDPAGE
%%EndPage: "28" 29
%%Page: "29" 29
595.3 841.9 0 FMBEGINPAGE
99.65 771.95 531.65 789.95 R
7 X
0 K
V
1 12 Q
0 X
(- 29 -) 302.66 781.95 T
99.65 96.95 531.65 744.95 R
7 X
V
0 X
(Figure 6 \050continuation of the) 99.65 736.95 T
2 F
(compress\050\051) 240.56 736.95 T
1 F
( function\051) 293.85 736.95 T
(3 For each hit sequences which has an estimated compression score above) 171.65 713.95 T
-0.51 (some threshold value and which will \324project\325 into a single sequence) 207.65 698.95 P
(\050as described in the text\051, convert the hit sequence into) 207.65 683.95 T
(the corresponding alignment. Discard this alignment if it is) 207.65 668.95 T
(identical with any alignment already in Old. Otherwise, compute) 207.65 653.95 T
(the compression score using the method described in Section 5,) 207.65 638.95 T
(print the new alignment and add it to the store of alignments) 207.65 623.95 T
(created by the program. If no new alignments are formed,) 207.65 608.95 T
(quit the) 207.65 593.95 T
2 F
(compress\050\051) 246.96 593.95 T
1 F
(function.) 303.25 593.95 T
-0.16 (4 Examine all the alignments that have been created since the beginning of) 171.65 578.95 P
(processing and choose a \324best\325 subset of these alignments as) 207.65 563.95 T
(described in the text. Remove from the store of alignments) 207.65 548.95 T
(all the alignments which have not been selected.) 207.65 533.95 T
(5 Clear the list of driving patterns and then, using the same method) 171.65 518.95 T
(as is used in 4 but \050usually\051 with a more restrictive parameter,) 207.65 503.95 T
(select a subset of the alignments remaining in the store of) 207.65 488.95 T
(alignments and add references to those alignments to the) 207.65 473.95 T
(list of driving patterns for the next cycle. \050these patterns are) 207.65 458.95 T
(not removed from the store of alignments and may therefore) 207.65 443.95 T
(also be target patterns on the next cycle\051.) 207.65 428.95 T
0 F
(}) 135.65 413.95 T
(Figure 6) 135.65 389.95 T
1 F
(An outline, in pseudocode, of the organisation of the SP61 model.) 135.65 373.95 T
0 F
(6.1) 99.65 349.95 T
(The \324hit structure\325) 135.65 349.95 T
1 F
0.6 (On each application of the) 99.65 325.95 P
2 F
0.6 (compress\050\051) 232.2 325.95 P
1 F
0.6 ( function shown in Figure 6, a set of alignments is) 285.48 325.95 P
1.03 (identified, each one between a \324driving\325 pattern and a \324target\325 pattern. Since the driving) 99.65 311.95 P
0.02 (pattern or the target pattern or both may represent an alignment formed at an earlier stage,) 99.65 297.95 P
(the model can build alignments with any number of rows.) 99.65 283.95 T
0.46 (The) 135.65 267.95 P
2 F
0.46 (hit structure) 157.75 267.95 P
1 F
0.46 ( is the data structure used to record hits between driving patterns) 217.18 267.95 P
0.65 (and target patterns. In effect, it is a set of lists where each list records a sequence of hits) 99.65 253.95 P
1.57 (between one driving pattern and one target pattern. To save space and to facilitate the) 99.65 239.95 P
0.49 (process of updating the set of lists, a tree structure is used where each path from the root) 99.65 225.95 P
0.35 (node to a leaf node records a sequence of hits between one driving pattern and one target) 99.65 211.95 P
(pattern.) 99.65 197.95 T
0 F
(6.2) 99.65 163.95 T
(Selection of alignments at the end of each application of the) 135.65 163.95 T
3 F
(compress\050\051) 441.8 163.95 T
0 F
(function) 135.65 149.95 T
1 F
2.42 (In Step 4 of the) 99.65 125.95 P
2 F
2.42 (compress\050\051) 188.72 125.95 P
1 F
2.42 ( function shown in Figure 6, alignments are selected for) 242.01 125.95 P
0.42 (retention in the store of alignments and the rest are discarded. The process of selection is) 99.65 111.95 P
FMENDPAGE
%%EndPage: "29" 30
%%Page: "30" 30
595.3 841.9 0 FMBEGINPAGE
99.65 771.95 531.65 789.95 R
7 X
0 K
V
1 12 Q
0 X
(- 30 -) 302.66 781.95 T
99.65 96.95 531.65 744.95 R
7 X
V
0 X
-0.38 (not simply a matter of choosing the alignments with the highest CD values. This is because) 99.65 736.95 P
-0.18 (it often happens that a majority of the alignments, often with relatively high CD values, all) 99.65 722.95 P
3.53 (represent encodings of the same symbols from within New and a minority of the) 99.65 708.95 P
-0.03 (alignments, which may have relatively low CD values, encode symbols from elsewhere in) 99.65 694.95 P
1.71 (New. If selection was to be made purely on the strength of high CD value then these) 99.65 680.95 P
-0.34 (\324minority\325 alignments would be lost. Since they are often stepping stones on the path to the) 99.65 666.95 P
-0.62 (building of alignments which encode all the symbols in New, removing them from the store) 99.65 652.95 P
(of alignments may prevent the system ever finding these \324full\325 alignments for New.) 99.65 638.95 T
0.37 (The solution to this problem is to assign a \324quota\325 to each of the symbols in New,) 135.65 622.95 P
1.42 (the same value for all the symbols. Selections are then made for each symbol in New,) 99.65 608.95 P
-0.7 (choosing alignments where the given symbol forms a hit within the alignment and, amongst) 99.65 594.95 P
0.24 (those alignments, choosing the ones with the highest CDs until the quota is filled or there) 99.65 580.95 P
0.77 (are no more suitable alignments. Any one alignment may appear in the quota for two or) 99.65 566.95 P
0.66 (more symbols from New. All alignments which have not been assigned to any quota are) 99.65 552.95 P
(discarded.) 99.65 538.95 T
0.05 (The overall effect of this method of selection is to \324protect\325 alignments which may) 135.65 522.95 P
1.95 (have low CD values but which are needed as stepping stones on the way to building) 99.65 508.95 P
0.31 (alignments which \324encode\325 all or most of the symbols in New and which have higher CD) 99.65 494.95 P
(values than would otherwise be obtained.) 99.65 480.95 T
1.35 (The same method is used in Step 5 of the) 135.65 464.95 P
2 F
1.35 (compress\050\051) 349.7 464.95 P
1 F
1.35 ( function to select driving) 402.99 464.95 P
0.76 (patterns for use in the next cycle. When driving patterns are selected, the quota for each) 99.65 450.95 P
-0.26 (symbol from New is, typically, smaller than when alignments are selected to be retained in) 99.65 436.95 P
-0.18 (the store of alignments. Normally, this means that the driving patterns chosen at the end of) 99.65 422.95 P
(each cycle are a subset of the alignments chosen to be retained at the end of each cycle.) 99.65 408.95 T
0 F
(6.3) 99.65 374.95 T
(Computational complexity) 135.65 374.95 T
1 F
-0.71 (Given that all the example grammars in this article are much smaller than would be required) 99.65 350.95 P
1.9 (in any realistic system, and given the well-known computational demands of multiple) 99.65 336.95 P
1.93 (alignment problems, readers may reasonably ask whether the proposed framework for) 99.65 322.95 P
0.8 (parsing would scale up to handle realistically large knowledge structures and patterns in) 99.65 308.95 P
(New.) 99.65 294.95 T
1.87 (It has been shown elsewhere [50, 49] that the time complexity of the function) 135.65 278.95 P
0.59 (\050SP21\051 for finding good partial matches between two patterns, which is embedded in the) 99.65 264.95 P
2 F
-0.45 (compress\050\051) 99.65 250.95 P
1 F
-0.45 ( function, is O\050) 152.93 250.95 P
2 F
-0.45 (nm) 224.22 250.95 P
1 F
-0.45 (\051 in a serial processing environment, where) 238.87 250.95 P
2 F
-0.45 (n) 445.59 250.95 P
1 F
-0.45 ( is the sum of the) 451.59 250.95 P
0.19 (lengths of the patterns in New and) 99.65 236.95 P
2 F
0.19 (m) 268.52 236.95 P
1 F
0.19 ( is the sum of the lengths of the patterns in Old. This) 277.18 236.95 P
1.58 (big O value is typical of systems which, like SP21, perform dynamic programming or) 99.65 222.95 P
2.76 (something similar. In a parallel processing environment, the time complexity should) 99.65 208.95 P
3.1 (approach O\050) 99.65 194.95 P
2 F
3.1 (n) 162.35 194.95 P
1 F
3.1 (\051,) 168.35 194.95 P
2 F
3.1 (n) 181.44 194.95 P
5 F
3.1 (\243) 193.53 194.95 P
2 F
3.1 ( m,) 200.11 194.95 P
1 F
3.1 (depending on how the parallel processing is applied) 223.96 194.95 P
2 F
3.1 (.) 494.47 194.95 P
1 F
3.1 (In all) 503.57 194.95 P
0.38 (environments, the space complexity is O\050) 99.65 180.95 P
2 F
0.38 (m) 300.76 180.95 P
1 F
0.38 (\051 - which is substantially better than the space) 309.41 180.95 P
(complexity of any \324standard\325 dynamic programming algorithm.) 99.65 166.95 T
-0.65 (In SP61, alignments are built by pairwise alignment of patterns and alignments. The) 135.65 150.95 P
-0.05 (number of such pairings required to build an alignment for a whole sentence appears to be) 99.65 136.95 P
0.31 (approximately log) 99.65 122.95 P
1 10 Q
0.25 (2) 187.57 119.95 P
1 12 Q
0.31 (\050) 195.32 122.95 P
2 F
0.31 (n) 199.31 122.95 P
1 F
0.31 ( /) 205.31 122.95 P
2 F
0.31 (c) 215.25 122.95 P
1 F
0.31 (\051, where) 220.58 122.95 P
2 F
0.31 (n) 263.48 122.95 P
1 F
0.31 ( is the length of the pattern in New \050in symbols\051 and) 269.48 122.95 P
2 F
0.31 (c) 526.32 122.95 P
1 F
(is a constant.) 99.65 108.95 T
FMENDPAGE
%%EndPage: "30" 31
%%Page: "31" 31
595.3 841.9 0 FMBEGINPAGE
99.65 771.95 531.65 789.95 R
7 X
0 K
V
1 12 Q
0 X
(- 31 -) 302.66 781.95 T
99.65 96.95 531.65 744.95 R
7 X
V
0 X
0.94 (The values of) 135.65 736.95 P
2 F
0.94 (n) 206.76 736.95 P
1 F
0.94 ( and) 212.76 736.95 P
2 F
0.94 (m) 237.96 736.95 P
1 F
0.94 ( for successive pairings will vary, starting relatively small) 246.62 736.95 P
-0.26 (when) 99.65 722.95 P
2 F
-0.26 (n) 128.37 722.95 P
1 F
-0.26 ( is the length of the pattern in New and) 134.37 722.95 P
2 F
-0.26 (m) 321.67 722.95 P
1 F
-0.26 ( is the sum of the lengths of the patterns in) 330.33 722.95 P
0.17 (the grammar and becoming larger when new alignments have been added to the store and) 99.65 708.95 P
2.57 (selected as driving patterns. However, the selection process described in Section 6.2) 99.65 694.95 P
2.12 (ensures that, after the initial growth, the sizes of) 99.65 680.95 P
2 F
2.12 (n) 352.19 680.95 P
1 F
2.12 ( and) 358.19 680.95 P
2 F
2.12 (m) 385.73 680.95 P
1 F
2.12 ( remain relatively constant:) 394.39 680.95 P
-0.45 (alignments in the later stages are longer than in the early stages but there are fewer of them.) 99.65 666.95 P
-0.28 (For the purpose of assessing computational complexity, it seems reasonable to assume that) 99.65 652.95 P
(variations in the sizes of) 99.65 638.95 T
2 F
(n) 219.23 638.95 T
1 F
( and) 225.23 638.95 T
2 F
(m) 248.55 638.95 T
1 F
( from the start to finish of processing can be discounted.) 257.2 638.95 T
-0.01 (Overall, the time complexity of SP61 in a serial processing environment should be) 135.65 622.95 P
-0.26 (approximately O\050log) 99.65 608.95 P
1 10 Q
-0.22 (2) 199.65 605.95 P
2 12 Q
-0.26 (n) 207.38 608.95 P
5 F
-0.26 (\327) 216.11 608.95 P
2 F
-0.26 (nm) 221.84 608.95 P
1 F
-0.26 (\051, where) 236.5 608.95 P
2 F
-0.26 (n) 278.26 608.95 P
1 F
-0.26 ( is the length of the sentence and) 284.26 608.95 P
2 F
-0.26 (m) 442.02 608.95 P
1 F
-0.26 ( is the sum of the) 450.68 608.95 P
0.71 (lengths of the patterns in Old. In a parallel processing environment, the time complexity) 99.65 594.95 P
-0.58 (may approach O\050log) 99.65 580.95 P
1 10 Q
-0.48 (2) 197.08 577.95 P
2 12 Q
-0.58 (n) 204.49 580.95 P
5 F
-0.58 (\327) 212.91 580.95 P
2 F
-0.58 (n) 218.33 580.95 P
1 F
-0.58 (\051, depending on how the parallel processing is applied. The space) 224.33 580.95 P
(complexity should be O\050) 99.65 566.95 T
2 F
(m) 218.57 566.95 T
1 F
(\051.) 227.23 566.95 T
0.22 (In summary, there is reason to think that the method of forming alignments which) 135.65 550.95 P
1.68 (is embodied in SP61 will not lead to running times or demands for storage which are) 99.65 536.95 P
(hopelessly impractical when the this approach is applied to realistically large examples.) 99.65 522.95 T
0 F
(7) 99.65 488.95 T
(Probabilistic reasoning, multiple alignment and information compression) 135.65 488.95 T
1 F
0.54 (What connection is there between the formation of an alignment, as described in Section) 99.65 464.95 P
-0.16 (2.1.1, and PrbRs? Using simple examples, this section tries to show informally how PrbRs) 99.65 450.95 P
1.06 (may be understood as the formation of an alignment in which some part or parts of the) 99.65 436.95 P
4.75 (patterns from Old which appear in the alignment \050single symbols, substrings or) 99.65 422.95 P
1.91 (subsequences within patterns from Old\051 are not aligned with any matching symbol or) 99.65 408.95 P
-0.03 (symbols from New. To the extent that the alignment is a \324good\325 alignment, the unmatched) 99.65 394.95 P
0.52 (symbols from Old which appear within the alignment may be interpreted as probabilistic) 99.65 380.95 P
-0.1 (inferences. As a working hypothesis, all kinds of PrbRs may be understood in these terms.) 99.65 366.95 P
1.3 ( What connection is there between the probability of any inferences which may) 135.65 350.95 P
1.54 (appear in an alignment and measures of compression for that alignment? How can the) 99.65 336.95 P
-0.03 (probability of MA inferences be calculated? Answers to these two questions are presented) 99.65 322.95 P
(at the beginning of the second of these three articles [40].) 99.65 308.95 T
0 F
(7.1) 99.65 274.95 T
(Scene analysis and pattern recognition) 135.65 274.95 T
1 F
0.59 (The idea that pattern recognition might be understood as IC has been recognised from at) 99.65 250.95 P
0.67 (least as far back as 1972 \050[38]\051. At first sight, pattern recognition and IC seem unrelated) 99.65 236.95 P
0.42 (and, at first sight, it is not obvious what either of them has to do with PrbRs. However, a) 99.65 222.95 P
(little reflection should make clear how these things are related.) 99.65 208.95 T
0.79 (Consider how we perceive a typical scene. Outdoors, for example, we may see a) 135.65 192.95 P
0.82 (tree, behind which is parked a car which is itself in front of a house. With no conscious) 99.65 178.95 P
-0.04 (effort we immediately recognise the tree, the car and the house despite the fact that part of) 99.65 164.95 P
(the car is obscured by the tree and part of the house is obscured by the tree and the car.) 99.65 150.95 T
0.2 (Not only can we recognise a whole object or pattern from a subset of its parts but,) 135.65 134.95 P
0.55 (within wide limits, it normally does not matter very much which parts we see and which) 99.65 120.95 P
1.69 (are obscured. This process of recognising objects in a scene using partial information,) 99.65 106.95 P
FMENDPAGE
%%EndPage: "31" 32
%%Page: "32" 32
595.3 841.9 0 FMBEGINPAGE
99.65 771.95 531.65 789.95 R
7 X
0 K
V
1 12 Q
0 X
(- 32 -) 302.66 781.95 T
99.65 96.95 531.65 744.95 R
7 X
V
0 X
0.6 (which is so familiar and so apparently effortless, may be interpreted as a form of MA. It) 99.65 736.95 P
(may also be interpreted as a form of PrbRs.) 99.65 722.95 T
0.54 (Figure 7 shows a an MA which represents, schematically, a side view of how the) 135.65 706.95 P
(scene described above is recognised.) 99.65 692.95 T
0 F
(Figure 7) 135.65 569.95 T
1 F
-0.64 (An alignment illustrating schematically the recognition of a house, car and tree. The) 135.65 553.95 P
0.4 (top row represents a side view or cross-section of the scene as it is projected onto) 135.65 539.95 P
0.15 (the retina of one eye. The second row \050\324) 135.65 525.95 P
4 F
0.37 (t) 328.37 525.95 P
1 F
0.15 (\325\051 represents a side view or cross-section) 335.56 525.95 P
0.29 (of a record in memory for the appearance of a tree. Rows three and four represent) 135.65 511.95 P
(similar records for a car \050\324) 135.65 497.95 T
4 F
(c) 261.2 497.95 T
1 F
(\325\051 and a house \050\324) 268.39 497.95 T
4 F
(h) 346.98 497.95 T
1 F
(\325\051.) 354.18 497.95 T
2.87 (The symbols \324) 135.65 473.95 P
4 F
6.9 (t) 210.02 473.95 P
1 F
2.87 (\325, \324) 217.22 473.95 P
4 F
6.9 (c) 234.08 473.95 P
1 F
2.87 (\325 and \324) 241.27 473.95 P
4 F
6.9 (h) 278.32 473.95 P
1 F
2.87 (\325 represent elements of the tree, car and house,) 285.52 473.95 P
0.16 (respectively. The top row represents a side view or cross-section of an image of the scene) 99.65 459.95 P
-0.45 (as it appears on the retina of one eye. The three rows below represent similar views of three) 99.65 445.95 P
(stored patterns corresponding to the three types of object.) 99.65 431.95 T
0.26 (There is, of course, much more to the recognition process than is conveyed by the) 135.65 415.95 P
1.24 (figure. But, for recognition to occur, there must be a mapping between elements of the) 99.65 401.95 P
1.62 (image and elements of the stored patterns. The alignment shown in Figure 7 seems to) 99.65 387.95 P
(capture the overall structure of that mapping.) 99.65 373.95 T
3.45 (Where does reasoning figure in this kind of partial pattern recognition? By) 135.65 357.95 P
0.27 (recognising the car and the house we infer the existence of the parts which we cannot see) 99.65 343.95 P
0.08 (but which appear in our memories of typical cars and houses, or our memory of a specific) 99.65 329.95 P
0.44 (house or car. We may, for example, infer that the handle of one door of the car is hidden) 99.65 315.95 P
0.93 (behind the tree. Likewise, we may infer that unseen parts of the house are composed of) 99.65 301.95 P
0.44 (brick or stone in accordance with our memory of what houses are built from or what one) 99.65 287.95 P
0.12 (specific house is built from. If we can see just the top half of a door, we are likely to infer) 99.65 273.95 P
-0.73 (that we would see the bottom half if the car were to be moved or if we looked behind the car.) 99.65 259.95 P
-0.22 (The inferences are probabilistic because none of them are certain. In some unlikely) 135.65 243.95 P
-0.45 (world, there might simply be gaps in the car and the house at the precise points where those) 99.65 229.95 P
0.11 (gaps are covered by the object in front. Or the part of the house which we cannot see may) 99.65 215.95 P
0.81 (be made of paper, not brick or stone - or it could conceivably be made of green cheese!) 99.65 201.95 P
0.02 (Whether something exists when it is unseen \050or otherwise perceived\051 is, of course, a long-) 99.65 187.95 P
(standing issue in philosophy.) 99.65 173.95 T
0 F
(7.2) 99.65 139.95 T
(Reasoning with rules) 135.65 139.95 T
1 F
-0.31 (The kind of partial pattern recognition discussed in the previous subsection is, perhaps, not) 99.65 115.95 P
0.61 (normally regarded as a paradigm for PrbRs. This subsection tries to show informally, by) 99.65 101.95 P
99.65 96.95 531.65 744.95 C
101.15 589.95 530.15 688.95 C
4 14 Q
0 X
0 K
(t) 289.6 647.7 T
(t) 308.8 647.7 T
(t) 327.99 647.7 T
(c) 232.02 625.28 T
(c) 251.21 625.28 T
(c) 270.41 625.28 T
(c) 289.6 625.28 T
(c) 308.8 625.28 T
(c) 327.99 625.28 T
(c) 347.19 625.28 T
(c) 366.39 625.28 T
(c) 385.58 625.28 T
(c) 404.78 625.28 T
(h) 174.43 602.85 T
(h) 193.63 602.85 T
(h) 212.82 602.85 T
(h) 232.02 602.85 T
(h) 251.21 602.85 T
(h) 270.41 602.85 T
(h) 289.6 602.85 T
(h) 308.8 602.85 T
(h) 327.99 602.85 T
(h) 347.19 602.85 T
(h) 366.39 602.85 T
(h) 385.58 602.85 T
(h) 404.78 602.85 T
(h) 423.97 602.85 T
(h) 443.17 602.85 T
(h) 462.36 602.85 T
(h) 481.56 602.85 T
(h) 500.75 602.85 T
(h) 155.24 602.85 T
(h) 136.04 602.85 T
(h) 174.43 670.13 T
(h) 193.63 670.13 T
(h) 212.82 670.13 T
(h) 423.97 670.13 T
(h) 443.17 670.13 T
(h) 462.36 670.13 T
(h) 481.56 670.13 T
(h) 500.75 670.13 T
(h) 155.24 670.13 T
(h) 136.04 670.13 T
(c) 232.02 670.13 T
(c) 251.21 670.13 T
(c) 270.41 670.13 T
(c) 347.19 670.13 T
(c) 366.39 670.13 T
(c) 385.58 670.13 T
(c) 404.78 670.13 T
(t) 289.6 670.13 T
(t) 308.8 670.13 T
(t) 327.99 670.13 T
140.04 666.13 140.04 614.13 2 L
0.5 H
2 Z
N
158.04 666.13 158.04 614.13 2 L
N
177.04 666.13 177.04 614.13 2 L
N
197.04 666.13 197.04 614.13 2 L
N
217.04 666.13 217.04 614.13 2 L
N
426.04 666.13 426.04 614.13 2 L
N
447.04 666.13 447.04 614.13 2 L
N
465.04 666.13 465.04 614.13 2 L
N
485.04 666.13 485.04 614.13 2 L
N
504.04 666.13 504.04 614.13 2 L
N
236.04 666.13 236.04 635.13 2 L
N
255.04 666.13 255.04 635.13 2 L
N
275.04 666.13 275.04 635.13 2 L
N
351.04 666.13 351.04 635.13 2 L
N
370.04 666.13 370.04 635.13 2 L
N
390.04 666.13 390.04 635.13 2 L
N
409.04 666.13 409.04 635.13 2 L
N
294.04 666.13 294.04 660.13 2 L
N
312.22 666.13 312.22 660.13 2 L
N
330.4 666.13 330.4 660.13 2 L
N
99.65 96.95 531.65 744.95 C
-8.35 24.95 603.65 816.95 C
FMENDPAGE
%%EndPage: "32" 33
%%Page: "33" 33
595.3 841.9 0 FMBEGINPAGE
99.65 771.95 531.65 789.95 R
7 X
0 K
V
1 12 Q
0 X
(- 33 -) 302.66 781.95 T
99.65 96.95 531.65 744.95 R
7 X
V
0 X
0.15 (means of a simple example, that reasoning with \324rules\325 - which is, perhaps, closer to what) 99.65 736.95 P
-0.29 (most people understand as \324reasoning\325 - might also be understood in terms of ICMAUS. In) 99.65 722.95 P
-0.44 (this context, a \324rule\325 is something which expresses a relationship between a \324condition\325 and) 99.65 708.95 P
(an associated \324action\325 or \324consequence\325 - like a rule in an expert system.) 99.65 694.95 T
-0.38 (We are all familiar with the dictum \322There is no smoke without fire\323. Although this) 135.65 678.95 P
2.23 (is often used in a metaphorical sense, it expresses, in a slightly inaccurate form, our) 99.65 664.95 P
0.55 (experience of real smoke and real fires: that \050almost\051 every fire produces smoke and that) 99.65 650.95 P
(\050almost\051 all smoke is generated by a fire.) 99.65 636.95 T
-0.39 (A simple and natural way to represent this association, \324rule of thumb\325 or regularity) 135.65 620.95 P
1.52 (about the world is by means of a simple pattern containing a symbol for smoke and a) 99.65 606.95 P
(symbol for fire, something like this: \324) 99.65 592.95 T
4 F
(fire smoke) 279.21 592.95 T
1 F
(\325 or, perhaps, \324) 351.17 592.95 T
4 F
(smoke fire) 421.44 592.95 T
1 F
(\325.) 493.4 592.95 T
-0.64 (To honour fully the principle that knowledge structures should reflect the \324raw\325 data) 135.65 576.95 P
0.75 (from which they are derived \050see Section 1.3\051, representation of the association between) 99.65 562.95 P
-0.44 (smoke and fire would need three spatial dimensions and one temporal dimension. Since we) 99.65 548.95 P
0.18 (have chosen, for the time being, to confine ourselves to one-dimensional patterns, the full) 99.65 534.95 P
-0.5 (structure of the association between smoke and fire can be represented only approximately.) 99.65 520.95 P
-0.36 (In the example patterns, above, the first version is, possibly, better than the second because) 99.65 506.95 P
-0.02 (quantities of smoke which can be seen easily first appear after the fire which creates it has) 99.65 492.95 P
0.33 (started. However, for many applications, this kind of detail is not important and the basic) 99.65 478.95 P
(fact that smoke and fire tend to occur together is what matters.) 99.65 464.95 T
0.71 (If we see some smoke, this new observation may be represented in \324New\325 with a) 135.65 448.95 P
-0.47 (pattern containing a single symbol for smoke, thus: \324) 99.65 434.95 P
4 F
-1.12 (smoke) 348.77 434.95 P
1 F
-0.47 (\325. This pattern may be matched) 384.75 434.95 P
(with \324Old\325 knowledge which includes the pattern \324) 99.65 420.95 T
4 F
(fire smoke) 341.81 420.95 T
1 F
(\325.) 413.77 420.95 T
-0.2 (Amongst the \324good\325 alignments arising from this matching process we are likely to) 135.65 404.95 P
(find this alignment:) 99.65 390.95 T
4.17 (As in the previous example of partial pattern matching \050Section 7.1\051, any) 135.65 326.95 P
0.46 (unmatched symbol in the alignment may be interpreted as an inference which the system) 99.65 312.95 P
-0.61 (has made. In this case, the existence of smoke has led to an inference that there is likely also) 99.65 298.95 P
0.78 (to be a fire in the same vicinity. In most cases, the inference will not be certain because) 99.65 284.95 P
3.13 (\324) 99.65 270.95 P
4 F
7.52 (smoke) 103.64 270.95 P
1 F
3.13 (\325 may also align with patterns relating to artificial smoke used in dramatic) 139.62 270.95 P
(productions where there is no associated fire.) 99.65 256.95 T
0.72 (This subsection has considered an extremely simple example of reasoning with a) 135.65 240.95 P
2.36 (rule. More elaborate examples are presented in [40] and [41], including examples of) 99.65 226.95 P
(\324chains\325 of reasoning in two or more steps.) 99.65 212.95 T
0 F
(8) 99.65 178.95 T
(Conclusion) 135.65 178.95 T
1 F
2.19 (This, the first of three articles introducing the idea that PrbRs may be understood as) 99.65 154.95 P
-0.57 (ICMAUS, has introduced the concept of multiple alignment as it has been developed in this) 99.65 140.95 P
-0.52 (work and has explained how IC associated with any such alignment may be calcluated. The) 99.65 126.95 P
0.21 (SP61 model for the formation of alignemnts has been described in outline and, in outline,) 99.65 112.95 P
99.65 96.95 531.65 744.95 C
102.65 338.95 528.65 386.95 C
4 12 Q
0 X
0 K
(fire) 173.65 348.95 T
(smoke) 213.23 348.95 T
(smoke) 213.23 372.95 T
229.01 368.95 229.01 356.95 2 L
0.5 H
2 Z
N
99.65 96.95 531.65 744.95 C
-8.35 24.95 603.65 816.95 C
FMENDPAGE
%%EndPage: "33" 34
%%Page: "34" 34
595.3 841.9 0 FMBEGINPAGE
99.65 771.95 531.65 789.95 R
7 X
0 K
V
1 12 Q
0 X
(- 34 -) 302.66 781.95 T
99.65 96.95 531.65 744.95 R
7 X
V
0 X
(it has been shown how probabilistic inferences may be derived from alignments.) 99.65 736.95 T
-0.62 (The next article [40] starts with a description of how probabilities of inferences may) 135.65 720.95 P
0.58 (be calculated. The remainder of [40] and most of the third article [41] present a range of) 99.65 706.95 P
0.83 (examples to show how different aspects and kinds of PrbRs may be understood in these) 99.65 692.95 P
(terms.) 99.65 678.95 T
0 14 Q
(Acknowledgements) 99.65 643.62 T
1 12 Q
2.31 (I am grateful to Prof. C. S Wallace of Monash University, to Dr. Tim Porter of the) 99.65 618.95 P
-0.72 (Department of Mathematics, University of Wales, Bangor \050UWB\051 and to Mr. John Hornsby) 99.65 604.95 P
3.69 (of the School of Electronic Engineering and Computer) 99.65 590.95 P
3.69 ( Systems, UWB, for useful) 387.63 590.95 P
-0.21 (discussions of various ideas presented in this article and the two articles which accompany) 99.65 576.95 P
(it.) 99.65 562.95 T
0 14 Q
(References) 99.65 527.62 T
1 12 Q
(1) 99.65 502.95 T
2.35 (Allison, L and Wallace, C. S. \0501994\051 The posterior probability distribution of) 135.65 502.95 P
-0.23 (alignments and its application to parameter estimation of evolutionary trees) 171.65 488.95 P
-0.35 (and to optimization of multiple alignments.) 171.65 474.95 P
2 F
-0.35 (Journal of Molecular Evol) 380.45 474.95 P
1 F
-0.35 (ution) 506.99 474.95 P
(39, 418-430.) 171.65 460.95 T
(2) 99.65 444.95 T
2.97 (Allison, L, Wallace, C. S. and Yee, C. N. \0501992\051 Finite-state models in the) 135.65 444.95 P
(alignment of macromolecules.) 171.65 430.95 T
2 F
(Journal of Molecular Evol) 320.21 430.95 T
1 F
(ution, 35, 77-89.) 447.8 430.95 T
(3) 99.65 414.95 T
1.82 (Barton) 135.65 414.95 P
1.82 (G. J. \0501990\051 Protein Multiple Sequence Alignment and Flexible Pattern) 173.12 414.95 P
(Matching.) 171.65 400.95 T
2 F
(Methods in Enzymology,) 223.62 400.95 T
1 F
(183, 403-428.) 344.87 400.95 T
(4) 99.65 384.95 T
0.56 (Belloti, T. and Gammerman, A. \0501996\051 Experiments in solving analogy problems) 135.65 384.95 P
7.46 (using Minimal Length Encoding. Presented at Applied Decision) 171.65 370.95 P
-0.19 (Technologies \32595, Brunel University, April 1995 \050Proceedings of Stream 1,) 171.65 356.95 P
2 F
(Computational Learning and Probabilistic Reasoning) 171.65 342.95 T
1 F
(, pp. 209-220\051.) 430.86 342.95 T
(5) 99.65 326.95 T
1.04 (Bondarenko, A., Dung, P. M., Kowalski, R. A. and Toni, F. \0501997\051 An abstract,) 135.65 326.95 P
10.04 (argumentation-theoretic approach to default reasoning.) 171.65 312.95 P
2 F
10.04 (Artificial) 488.33 312.95 P
(Intelligence) 171.65 298.95 T
1 F
( 93\0501-2\051, 63-101.) 228.27 298.95 T
(6) 99.65 282.95 T
1.55 (Chan, S. C., Wong, A. K. C. and Chiu, D. K. Y. A \0501992) 135.65 282.95 P
2 F
1.55 (\051) 428.71 282.95 P
1 F
1.55 (Survey of Multiple) 437.26 282.95 P
0.4 (Sequence Comparison Methods.) 171.65 268.95 P
2 F
0.4 (Bulletin of Mathematical Biology) 331.42 268.95 P
1 F
0.4 (, 54) 492.87 268.95 P
2 F
0.4 ( \050) 511.26 268.95 P
1 F
0.4 (4\051,) 518.66 268.95 P
(563-598.) 171.65 254.95 T
(7) 99.65 238.95 T
1.2 (Cheeseman, P. \0501990\051 On finding the most probable model. In J. Strager and P.) 135.65 238.95 P
1.82 (Langley \050Eds.\051) 171.65 224.95 P
2 F
1.82 (Computational models of scientific discovery and theory) 249.56 224.95 P
-0.26 (formation) 171.65 210.95 P
1 F
-0.26 (, Chapter 3, Morgan Kaufmann, San Mateo, California, pp.73-95.) 218.96 210.95 P
(8) 99.65 194.95 T
1.69 (Cover, T. M. and Thomas, J. A. \0501991\051.) 135.65 194.95 P
2 F
1.69 (Elements of Information Theory) 342.68 194.95 P
1 F
1.69 (. New) 501.32 194.95 P
(York: John Wiley.) 171.65 180.95 T
(9) 99.65 164.95 T
0.97 (Cussens, J. and Hunter, A. \0501992\051 Using maximum entropy in a defeasible logic) 135.65 164.95 P
7.17 (with probabilistic semantics. In) 171.65 150.95 P
2 F
7.17 (Proceedings of the International) 353.9 150.95 P
0.53 (Conference in Information Processing and Management of Uncertainty in) 171.65 136.95 P
1.1 (Knowledge-Based Systems \050IPMU 92\051) 171.65 122.95 P
1 F
1.1 ( and in) 359.14 122.95 P
2 F
1.1 (Lecture Notes in Computer) 398.09 122.95 P
(Science) 171.65 108.95 T
1 F
(, vol. 682, pp. 43-52, Springer.) 208.28 108.95 T
FMENDPAGE
%%EndPage: "34" 35
%%Page: "35" 35
595.3 841.9 0 FMBEGINPAGE
99.65 771.95 531.65 789.95 R
7 X
0 K
V
1 12 Q
0 X
(- 35 -) 302.66 781.95 T
99.65 96.95 531.65 744.95 R
7 X
V
0 X
(10) 99.65 736.95 T
0.11 (Dagun, P. and Luby, M. \0501997\051 An optimal approximation algorithm for Bayesian) 135.65 736.95 P
(inference.) 171.65 722.95 T
2 F
(Artificial Intelligence) 222.26 722.95 T
1 F
( 93\0501-2\051, 1-27.) 325.2 722.95 T
(11) 99.65 706.95 T
1.94 (Day, W. H. E. and McMorris, F. R. \0501992\051 Critical Comparison of Consensus) 135.65 706.95 P
0.7 (Methods for Molecular Sequences.) 171.65 692.95 P
2 F
0.7 (Nucleic Acids Research,) 345.3 692.95 P
1 F
0.7 (20) 467.3 692.95 P
0.7 (\050) 482.99 692.95 P
2 F
0.7 (5) 486.98 692.95 P
1 F
0.7 (\051, 1093-) 492.98 692.95 P
(1099.) 171.65 678.95 T
(12) 99.65 662.95 T
3.55 (Felsenstein, J. \0501981\051 Evolutionary trees from DNA sequences: a maximum) 135.65 662.95 P
(likelihood approach.) 171.65 648.95 T
2 F
(Journal of Molecular Evolution,) 273.24 648.95 T
1 F
(17, 368-376.) 431.48 648.95 T
(13) 99.65 632.95 T
-0.39 (Gabbay, D. M., Hogger, C. J. and Robinson, J. A. \050Eds.\051 \0501994\051) 135.65 632.95 P
2 F
-0.39 (Handbook of Logic) 439.81 632.95 P
-0.64 (in Artificial Intelligence and Logic Programming, Volume 3, Nonmonotonic) 171.65 618.95 P
(Reasoning and Uncertain Reasoning) 171.65 604.95 T
1 F
(. Oxford: Clarendon Press.) 348.54 604.95 T
(14) 99.65 588.95 T
3.9 (Gammerman, A. J. \050Ed.\051 \0501996\051.) 135.65 588.95 P
2 F
3.9 (Computational Learning and Probabilistic) 314.37 588.95 P
(Reasoning) 171.65 574.95 T
1 F
(. Chichester: John Wiley.) 222.28 574.95 T
(15) 99.65 558.95 T
0.19 (Gammerman, A. J.) 135.65 558.95 P
2 F
0.19 (\050) 230.16 558.95 P
1 F
0.19 (1991\051 The representation and manipulation of the algorithmic) 234.16 558.95 P
3.57 (probability measure for problem solving) 171.65 544.95 P
2 F
3.57 (. Annals of Mathematics and) 379.79 544.95 P
(Artificial Intelligence,) 171.65 530.95 T
1 F
(4, 281-300.) 280.58 530.95 T
(16) 99.65 514.95 T
0.23 (Ginsberg, M. L. \0501994\051 AI and non-monotonic reasoning. In D. M. Gabbay, C. H.) 135.65 514.95 P
4.02 (Hogger and J. A. Robinson \050Eds.\051,) 171.65 500.95 P
2 F
4.02 (Handbook of Logic in Artificial) 364.31 500.95 P
1.84 (Intelligence and Logic Programming, Vol. 3, Non-monotonic Reasoning) 171.65 486.95 P
(and Uncertain Reasoning) 171.65 472.95 T
1 F
(. Oxford: Clarendon Press, pp. 1-33.) 294.9 472.95 T
(17) 99.65 456.95 T
3.84 (Gr\237nwald, P. \0501997\051 The Minimum Description Length Principle and Non-) 135.65 456.95 P
-0.56 (Deductive Inference.) 171.65 442.95 P
2 F
-0.56 (Proceedings of the IJCAI Workshop on Abduction and) 274.09 442.95 P
(Induction in AI) 171.65 428.95 T
1 F
(\050editor, P. Flach\051, Nagoya, Japan.) 247.27 428.95 T
(18) 99.65 412.95 T
0.4 (Gr\237nwald, P. \0501998\051 Minimum encoding approaches for predictive modelling. To) 135.65 412.95 P
1.03 (appear in the) 171.65 398.95 P
2 F
1.03 (Proceedings of the Fourteenth International Conference on) 239.67 398.95 P
(Uncertainty in Artificial Intelligence) 171.65 384.95 T
1 F
( \050Madison, WI, 1998\051.) 347.21 384.95 T
(19) 99.65 368.95 T
1.71 (Hortvitz, E. J., Breese, J. S. and Henrion, M. \0501988\051 Decision theory in expert) 135.65 368.95 P
1.75 (systems and artificial intelligence.) 171.65 354.95 P
2 F
1.75 (International Journal of Approximate) 345.52 354.95 P
(Reasoning 2) 171.65 340.95 T
1 F
(, 247-302.) 231.28 340.95 T
(20) 99.65 324.95 T
1.12 (Kern-Isberner, G. \0501998\051 Characterising the principle of minimum cross-entropy) 135.65 324.95 P
-0.68 (within a conditional-logical framework. Artificial Intelligence 98\0501-2\051, 169-) 171.65 310.95 P
(208.) 171.65 296.95 T
(21) 99.65 280.95 T
1.75 (Kohlas, J., Haenni, R. and Mouney, P. A. \0501998\051 Model-based diagnostics and) 135.65 280.95 P
0.91 (probabilistic assumption-based reasoning.) 171.65 266.95 P
2 F
0.91 (Artificial Intelligence) 378.92 266.95 P
1 F
0.91 ( 104\0501-2\051,) 482.77 266.95 P
(71-106.) 171.65 252.95 T
(22) 99.65 236.95 T
0.63 (Li, M. and Vit\207nyi, P.) 135.65 236.95 P
2 F
0.63 (\050) 247.07 236.95 P
1 F
0.63 (1997\051) 251.07 236.95 P
2 F
0.63 ( An Introduction to Kolmogorov Complexity and Its) 279.05 236.95 P
(Applications) 171.65 222.95 T
1 F
(. Springer-Verlag, New York.) 232.29 222.95 T
(23) 99.65 206.95 T
(Lowry, R. \0501989\051.) 135.65 206.95 T
2 F
(The Architecture of Chance) 225.59 206.95 T
1 F
(. Oxford: Oxford University Press.) 358.5 206.95 T
(24) 99.65 190.95 T
-0.37 (Parsons, S. and Hunter, A. \0501998\051. A review of uncertainty handling formalisms. In) 135.65 190.95 P
2.79 (\322Applications of Uncertainty Formalisms\323,) 171.65 176.95 P
2 F
2.79 (Lecture Notes in Computer) 393.01 176.95 P
(Science) 171.65 162.95 T
1 F
(, vol. 1455.) 208.28 162.95 T
(25) 99.65 146.95 T
3.36 (Pearl, J. \0501988\051.) 135.65 146.95 P
2 F
3.36 (Probabilistic Reasoning in Intelligent Systems: Networks of) 224.99 146.95 P
(Plausible Inference) 171.65 132.95 T
1 F
(. Morgan Kaufmann, San Francisco, California.) 265.24 132.95 T
(26) 99.65 116.95 T
0.36 (Pednault, E. P. D. \0501991\051 Minimal-length encoding and inductive inference. In G.) 135.65 116.95 P
3.78 (Piatetsky-Shapiro and W. J. Frawley \050eds.\051,) 171.65 102.95 P
2 F
3.78 (Knowledge Discovery in) 406.17 102.95 P
FMENDPAGE
%%EndPage: "35" 36
%%Page: "36" 36
595.3 841.9 0 FMBEGINPAGE
99.65 771.95 531.65 789.95 R
7 X
0 K
V
1 12 Q
0 X
(- 36 -) 302.66 781.95 T
99.65 96.95 531.65 744.95 R
7 X
V
2 F
0 X
(Databases) 171.65 736.95 T
1 F
(, MIT Press, Cambridge Mass.) 222.28 736.95 T
(27) 99.65 720.95 T
2.42 (Reichert,) 135.65 720.95 P
2.42 (T. A., Cohen, D. N. and Wong, A. K. C. \0501973\051 An application of) 184.7 720.95 P
1.25 (information theory to genetic mutations and the matching of polypeptide) 171.65 706.95 P
(sequences) 171.65 692.95 T
2 F
(. Journal of Theoretical Biology,) 220.27 692.95 T
1 F
(42, 245-261.) 380.51 692.95 T
(28) 99.65 676.95 T
-0.56 (Rissanen, J. \0501978\051 Modelling by the shortest data description.) 135.65 676.95 P
2 F
-0.56 (Automatica-J,.) 433.71 676.95 P
1 F
-0.56 (IFAC) 504.33 676.95 P
(14, 465-471.) 171.65 662.95 T
(29) 99.65 646.95 T
2.05 (Roytberg, M. A. A. \0501992) 135.65 646.95 P
2 F
2.05 (\051) 268.45 646.95 P
1 F
2.05 ( Search for Common Patterns in Many Sequences,) 272.44 646.95 P
2 F
2.05 (.) 528.65 646.95 P
(Cabios,) 171.65 632.95 T
1 F
(8 \0501\051, 57-64.) 211.63 632.95 T
(30) 99.65 616.95 T
5.9 (Sankoff, D. and Kruskall, J. B. \0501983\051) 135.65 616.95 P
2 F
5.9 (Time Warps, String Edits, and) 362.8 616.95 P
3.75 (Macromolecules: the Theory and Practice of Sequence Comparisons) 171.65 602.95 P
1 F
3.75 (.) 528.65 602.95 P
(Reading, MA: Addison-Wesley.) 171.65 588.95 T
(31) 99.65 572.95 T
0.42 (Schaub, T. and Bruning, S. \0501998\051 Prolog technology for default reasoning: proof) 135.65 572.95 P
(theory and compilation techniques.) 171.65 558.95 T
2 F
(Artificial Intelligence) 343.2 558.95 T
1 F
( 106\0501\051, 1-75.) 446.14 558.95 T
(32) 99.65 542.95 T
0.83 (Schurz, G. \0501998\051 Probabilistic semantics for Degrande\325s conditional logic and a) 135.65 542.95 P
(counter example to his default logic.) 171.65 528.95 T
2 F
(Artificial Intelligence) 349.86 528.95 T
1 F
( 102\0501\051, 81-95.) 452.8 528.95 T
(33) 99.65 512.95 T
1.02 (Solomonoff, R. J. \0501964\051. A formal theory of inductive inference. Parts I and II.) 135.65 512.95 P
2 F
(Information and Control) 171.65 498.95 T
1 F
(7: 1-22 and 224-254.) 293.26 498.95 T
(34) 99.65 482.95 T
0.96 (Storer, J. A.) 135.65 482.95 P
2 F
0.96 (\050) 199.15 482.95 P
1 F
0.96 (1988\051) 203.14 482.95 P
2 F
0.96 ( Data Compression: Methods and Theory) 231.12 482.95 P
1 F
0.96 (. Computer Science) 435.46 482.95 P
(Press, Rockville, Maryland.) 171.65 468.95 T
(35) 99.65 452.95 T
1.13 (Taylor, W. R. \0501988\051 Pattern matching methods in protein sequence comparison) 135.65 452.95 P
(and structure prediction.) 171.65 438.95 T
2 F
(Protein Engineering,) 291.56 438.95 T
1 F
(2 \0502\051, 77-86.) 395.83 438.95 T
(36) 99.65 422.95 T
1.19 (van der Gaag, L. C. \0501996\051 Bayesian belief networks: odds and ends.) 135.65 422.95 P
2 F
1.19 (Computer) 483.67 422.95 P
(Journal 39) 171.65 408.95 T
1 F
(, 97-113.) 223.95 408.95 T
(37) 99.65 392.95 T
5.32 (Wallace, C. S. and Boulton, D. M. \0501968\051 An information measure for) 135.65 392.95 P
(classification.) 171.65 378.95 T
2 F
(Computer Journal,) 240.93 378.95 T
1 F
( 11 \0502\051, 185-195.) 332.22 378.95 T
(38) 99.65 362.95 T
4.21 (Watanabe, S. \0501972\051 Pattern recognition as information compression. In S.) 135.65 362.95 P
1.24 (Watanabe \050Ed.\051,) 171.65 348.95 P
2 F
1.24 (Frontiers of Pattern Recognition) 255.38 348.95 P
1 F
1.24 (, New York: Academic) 416.69 348.95 P
(Press.) 171.65 334.95 T
(39) 99.65 318.95 T
0.49 (Wolff, J. G. \050submitted, a\051 Probabilistic reasoning as information compression by) 135.65 318.95 P
(multiple alignment, unification and search \050I\051: introduction \050This article\051.) 171.65 304.95 T
(40) 99.65 288.95 T
0.18 (Wolff, J. G. \050submitted, b\051 Probabilistic reasoning as ICMAUS \050II\051: calculation of) 135.65 288.95 P
0.18 (probabilities, best-match pattern recognition and information retrieval, and) 171.65 274.95 P
(reasoning with networks, trees and rules.) 171.65 260.95 T
(41) 99.65 244.95 T
0.49 (Wolff, J. G. \050submitted, c\051 Probabilistic reasoning as ICMAUS \050III\051: hypothetical) 135.65 244.95 P
1.71 (reasoning, geometric analogies, default values, nonmonotonic reasoning,) 171.65 230.95 P
(and modelling \324explaining away\325.) 171.65 216.95 T
(42) 99.65 200.95 T
3.56 (Wolff, J. G. \050submitted, d\051 Parsing as information compression by multiple) 135.65 200.95 P
-0.37 (alignment, unification and search. Two articles on which this paper is based) 171.65 186.95 P
-0.37 (\050\322Parsing ... SP52\323 and \322Parsing ... Examples\323\051 may be obtained from http:/) 171.65 172.95 P
(/www.sees.bangor.ac.uk/~gerry/sp_summary.html.) 171.65 158.95 T
(43) 99.65 142.95 T
1.46 (Wolff, J. G. \0501997\051 Causality, statistical learning and multiple alignment. Paper) 135.65 142.95 P
1.82 (presented at the UNICOM Seminar and Tutorial on Causal Models and) 171.65 128.95 P
(Statistical Learning, London, March 1997.) 171.65 114.95 T
FMENDPAGE
%%EndPage: "36" 37
%%Page: "37" 37
595.3 841.9 0 FMBEGINPAGE
99.65 771.95 531.65 789.95 R
7 X
0 K
V
1 12 Q
0 X
(- 37 -) 302.66 781.95 T
99.65 96.95 531.65 744.95 R
7 X
V
0 X
(44) 99.65 736.95 T
1.96 (Wolff, J. G. \0501996a\051 Logic as information compression by multiple alignment,) 135.65 736.95 P
2.59 (unification and search. SEECS Report, October 1996. A copy may be) 171.65 722.95 P
(obtained from http://www.sees.bangor.ac.uk/~gerry/sp_summary.html.) 171.65 708.95 T
(45) 99.65 692.95 T
0.81 (Wolff, J. G. \0501996b\051 Information compression by multiple alignment, unification) 135.65 692.95 P
1.29 (and search as a universal theory of computing. SEECS Report, February) 171.65 678.95 P
0.35 (1996. A copy may be obtained from http://www.sees.bangor.ac.uk/~gerry/) 171.65 664.95 P
(sp_summary.html.) 171.65 650.95 T
(46) 99.65 634.95 T
3.13 (Wolff, J. G. \0501996c\051 Learning and reasoning as information compression by) 135.65 634.95 P
3.65 (multiple alignment, unification and search. In: A. Gammerman \050ed.\051,) 171.65 620.95 P
2 F
0.86 (Computational Learning and Probabilistic Reasoning) 171.65 606.95 P
1 F
0.86 (, Wiley, Chichester,) 434.32 606.95 P
5.77 (pp. 67-83. An earlier version was presented at Applied Decision) 171.65 592.95 P
-0.19 (Technologies \32595, Brunel University, April 1995 \050Proceedings of Stream 1,) 171.65 578.95 P
2 F
(Computational Learning and Probabilistic Reasoning) 171.65 564.95 T
1 F
(, pp. 223-236\051.) 430.86 564.95 T
(47) 99.65 548.95 T
-0.08 (Wolff, J. G. \0501995a\051 Computing as compression: an overview of the SP theory and) 135.65 548.95 P
(system.) 171.65 534.95 T
2 F
(New Generation Computing) 210.96 534.95 T
1 F
(, 13, 187-214.) 346.22 534.95 T
(48) 99.65 518.95 T
5.32 (Wolff, J. G. \0501995b\051 Computing as compression: SP20.) 135.65 518.95 P
2 F
5.32 (New Generation) 447.38 518.95 P
(Computing) 171.65 504.95 T
1 F
(, 13, 215-241.) 224.96 504.95 T
(49) 99.65 488.95 T
1.06 (Wolff, J. G. \0501995c\051 Multiple alignment and computing. SEECS Report, August) 135.65 488.95 P
0.35 (1995. A copy may be obtained from http://www.sees.bangor.ac.uk/~gerry/) 171.65 474.95 P
(sp_summary.html.) 171.65 460.95 T
(50) 99.65 444.95 T
0.94 (Wolff, J. G. \0501994a\051 A scaleable technique for best-match retrieval of sequential) 135.65 444.95 P
1.03 (information using metrics-guided search.) 171.65 430.95 P
2 F
1.03 (Journal of Information Science,) 375.98 430.95 P
1 F
(20 \0501\051, 16-28.) 171.65 416.95 T
(51) 99.65 400.95 T
1.27 (Wolff, J. G. \0501994c\051 Towards a new concept of software) 135.65 400.95 P
2 F
1.27 (. Software Engineering) 418.17 400.95 P
(Journal) 171.65 386.95 T
1 F
(, 9) 208.96 386.95 T
2 F
(\0501\051) 223.95 386.95 T
1 F
(, 27-38.) 237.94 386.95 T
(52) 99.65 370.95 T
3.88 (Wolff, J. G. \0501993\051 Computing, cognition and information compression.) 135.65 370.95 P
2 F
3.88 (AI) 520.33 370.95 P
(Communications) 171.65 356.95 T
1 F
(, 6 \0502\051, 107-127.) 252.94 356.95 T
(53) 99.65 340.95 T
-0.57 (Wolff, J. G.) 135.65 340.95 P
2 F
-0.57 (\050) 193.91 340.95 P
1 F
-0.57 (1991) 197.9 340.95 P
2 F
-0.57 (\051 Towards a Theory of Cognition and Computing) 221.89 340.95 P
1 F
-0.57 (. Ellis Horwood,) 453.49 340.95 P
(Chichester.) 171.65 326.95 T
(54) 99.65 310.95 T
2.99 (Wolff, J. G. \0501988\051 Learning syntax and meanings through optimization and) 135.65 310.95 P
0.69 (distributional analysis. In Y. Levy, I. M. Schlesinger and M. D. S. Braine) 171.65 296.95 P
4.03 (\050eds.\051,) 171.65 282.95 P
2 F
4.03 (Categories and Processes in Language Acquisition) 208.65 282.95 P
1 F
4.03 (. Lawrence) 474.34 282.95 P
(Erlbaum, Hillsdale, NJ. Reprinted in Chapter 2 of Wolff \0501991\051.) 171.65 268.95 T
(55) 99.65 252.95 T
1.59 (Wolff, J. G. \0501982\051 Language acquisition, data compression and generalization.) 135.65 252.95 P
2 F
1.39 (Language & Communication) 171.65 238.95 P
1 F
1.39 (, 2, 57-89. Reprinted in Chapter 3 of Wolff) 314.35 238.95 P
(\0501991\051.) 171.65 224.95 T
0 14 Q
(A1) 99.65 189.62 T
(APPENDIX: DEFINITIONS OF TERMS) 135.65 189.62 T
0 12 Q
(Definition 1) 99.65 164.95 T
1 F
-0.29 (A) 135.65 145.95 P
0 F
-0.29 (symbol) 147.01 145.95 P
1 F
-0.29 ( is some kind of mark which can be discriminated from other symbols. In) 183.67 145.95 P
1.57 (the context of pattern matching, a symbol is the smallest unit which can participate in) 99.65 131.95 P
0.46 (matching: a symbol can be compared \050matched\051 only with another single symbol and the) 99.65 117.95 P
-0.14 (result of matching is either that the two symbols are the same or that they are different. No) 99.65 103.95 P
FMENDPAGE
%%EndPage: "37" 38
%%Page: "38" 38
595.3 841.9 0 FMBEGINPAGE
99.65 771.95 531.65 789.95 R
7 X
0 K
V
1 12 Q
0 X
(- 38 -) 302.66 781.95 T
99.65 96.95 531.65 744.95 R
7 X
V
0 X
(other result is permitted.) 99.65 736.95 T
-0.53 (An important feature of the concept of a) 135.65 720.95 P
2 F
-0.53 (symbol) 327.23 720.95 P
1 F
-0.53 (, as it is used in this research, is that,) 361.21 720.95 P
0.93 (with respect to the way it behaves within the system, it has) 99.65 706.95 P
3 F
0.93 (no hidden meaning) 395.64 706.95 P
1 F
0.93 (. In this) 493.47 706.95 P
0.06 (research, a symbol is) 99.65 692.95 P
2 F
0.06 (nothing but) 203.49 692.95 P
1 F
0.06 ( a primitive mark which can be discriminated in a yes/no) 258.53 692.95 P
2.28 (manner from other symbols. There are no symbols like the symbols in an arithmetic) 99.65 678.95 P
0.74 (function \050e.g., \3246\325, \32422\325, \324+\325, \324-\325, \324) 99.65 664.95 P
5 F
0.74 (\264) 263.06 664.95 P
1 F
0.74 (\325, \324/\325, \324\050\324, \324\051\325 etc\051, each of which has a meaning in the) 269.64 664.95 P
(system which is not directly visible.) 99.65 650.95 T
1 10 Q
(18) 272.22 655.75 T
1 12 Q
0.17 (Any symbol in the present scheme may have a meanings but it must take the form) 135.65 634.95 P
0.09 (of one or more other symbols which are associated with the given symbol and are explicit) 99.65 620.95 P
(and visible within the structure of symbols and patterns.) 99.65 606.95 T
0.42 (Although symbols in the proposed framework have no hidden meaning having an) 135.65 590.95 P
-0.14 (effect on how the symbol behaves in the system, they can and often do have a meaning for) 99.65 576.95 P
(the human reader.) 99.65 562.95 T
1.46 (In SP6, a symbol is any string of characters \050not including the space character\051) 135.65 546.95 P
(which is bounded at each end by spaces.) 99.65 532.95 T
0 F
(Related concepts) 99.65 508.95 T
1 F
0.93 (If two symbols match, we say that they belong to the same) 135.65 489.95 P
0 F
0.93 (symbol type) 430.93 489.95 P
1 F
0.93 (. In any) 493.49 489.95 P
-0.12 (system which contains symbols, we normally recognise an) 99.65 475.95 P
0 F
-0.12 (alphabet) 382.86 475.95 P
1 F
-0.12 ( of symbol types such) 427.51 475.95 P
0.86 (that every symbol in the system belongs in one and only one of the symbol types in the) 99.65 461.95 P
(alphabet, and every symbol type is represented at least once in the system.) 99.65 447.95 T
0.94 (A positive match between two symbols is termed a) 135.65 431.95 P
0 F
0.94 (hit) 391.64 431.95 P
1 F
0.94 (. One or more unmatched) 405.64 431.95 P
(symbols within one pattern between two hits is a) 99.65 417.95 T
0 F
(gap) 336.51 417.95 T
1 F
(.) 355.17 417.95 T
0 F
(Definition 2) 99.65 393.95 T
1 F
1.59 (A) 135.65 374.95 P
0 F
1.59 (pattern) 148.9 374.95 P
1 F
1.59 ( is an array of symbols in one, two or more dimensions. For present) 186.87 374.95 P
-0.03 (purposes, one dimensional patterns \050) 99.65 360.95 P
0 F
-0.03 (sequences) 275.08 360.95 P
1 F
-0.03 ( or) 325.71 360.95 P
0 F
-0.03 (strings) 341.64 360.95 P
1 F
-0.03 ( of symbols\051 will be the focus of) 376.29 360.95 P
(attention.) 99.65 346.95 T
2 F
0.25 (The meaning of the term pattern includes the meanings of the terms substring and) 135.65 330.95 P
(subsequence \050Definition 3 and Definition 4, below\051) 99.65 316.95 T
1 F
(.) 345.17 316.95 T
0 F
(Comment) 99.65 292.95 T
1 F
0.02 (Notwithstanding the definition of) 135.65 273.95 P
2 F
0.02 (symbol) 298.98 273.95 P
1 F
0.02 (, above, the start and end of every pattern) 332.96 273.95 P
0.78 (is marked for the system by a character whose sole purpose is to mark the left and right) 99.65 259.95 P
-0.49 (boundaries of patterns. These characters - which may be left and right brackets \050\324\050\325 and \324\051\325\051,) 99.65 245.95 P
-0.51 (single quotes or non-printing end-of-line characters - are not \324symbols\325 in the sense defined) 99.65 231.95 P
-0.1 (above. They are \324meta symbols\325, which are necessary for any system like the SP61 model,) 99.65 217.95 P
(if New and Old are to be divided into discrete patterns.) 99.65 203.95 T
0 F
(Definition 3) 99.65 179.95 T
1 F
0.64 (A) 135.65 160.95 P
0 F
0.64 (substring) 147.94 160.95 P
1 F
0.64 ( is a sequence of symbols of length) 195.93 160.95 P
2 F
0.64 (n) 372.26 160.95 P
1 F
0.64 ( within a sequence of length) 378.26 160.95 P
2 F
0.64 (m) 519.99 160.95 P
1 F
0.64 (,) 528.65 160.95 P
0.14 (where) 99.65 146.95 P
2 F
0.14 (n) 132.08 146.95 P
5 F
0.14 (\243) 141.21 146.95 P
2 F
0.14 (m) 150.93 146.95 P
1 F
0.14 ( and where the constituent symbols in the substring are contiguous within the) 159.59 146.95 P
99.65 119.95 531.65 134.93 C
99.65 119.95 531.65 134.93 R
7 X
0 K
V
108.65 132.91 252.65 132.91 2 L
V
0.5 H
2 Z
0 X
N
-8.35 24.95 603.65 816.95 C
1 12 Q
0 X
0 K
(18. But see the comment with the definition of) 99.65 111.95 T
2 F
(pattern) 326.18 111.95 T
1 F
(, below.) 360.83 111.95 T
FMENDPAGE
%%EndPage: "38" 39
%%Page: "39" 39
595.3 841.9 0 FMBEGINPAGE
99.65 771.95 531.65 789.95 R
7 X
0 K
V
1 12 Q
0 X
(- 39 -) 302.66 781.95 T
99.65 96.95 531.65 744.95 R
7 X
V
0 X
(sequence which contains the substring.) 99.65 736.95 T
0 F
(Definition 4) 99.65 712.95 T
1 F
-0.43 (A) 135.65 693.95 P
0 F
-0.43 (subsequence) 146.88 693.95 P
1 F
-0.43 ( is a sequence of symbols of length) 210.85 693.95 P
2 F
-0.43 (n) 378.65 693.95 P
1 F
-0.43 ( within a sequence of length) 384.65 693.95 P
2 F
-0.43 (m) 519.99 693.95 P
1 F
-0.43 (,) 528.65 693.95 P
-0.13 (where) 99.65 679.95 P
2 F
-0.13 (n) 131.82 679.95 P
5 F
-0.13 (\243) 140.68 679.95 P
2 F
-0.13 (m) 150.13 679.95 P
1 F
-0.13 ( and where the constituent symbols in the subsequence may not be contiguous) 158.79 679.95 P
-0.57 (within the sequence which contains the subsequence. The set of all subsequences of a given) 99.65 665.95 P
(sequence includes all the substrings of that sequence.) 99.65 651.95 T
FMENDPAGE
%%EndPage: "39" 40
%%Trailer
%%BoundingBox: 0 0 595.3 841.9
%%Pages: 39 1
%%DocumentFonts: Times-Bold
%%+ Times-Roman
%%+ Times-Italic
%%+ Times-BoldItalic
%%+ Courier
%%+ Symbol
%%+ Helvetica