%!PS-Adobe-3.0
%%Title: (98 LREC)
%%Creator: (Microsoft Word: PSPrinter 8.0b2c2)
%%CreationDate: (16:30 1998, 8 mayo )
%%For: (eneko agirre)
%%Pages: 6
%%DocumentFonts: Times-Bold Times-Roman Times-Italic Courier Symbol
%%DocumentNeededFonts: Times-Bold Times-Roman Times-Italic Courier Symbol
%%DocumentSuppliedFonts:
%%DocumentData: Clean7Bit
%%PageOrder: Ascend
%%Orientation: Portrait
%ADO_PaperArea: -30 -28 811 566
%ADO_ImageableArea: 0 0 781 538
%%EndComments
/md 127 dict def md begin
/currentpacking where {pop /sc_oldpacking currentpacking def true setpacking}if
%%BeginFile: adobe_psp_errorhandler
%%Copyright: Copyright 1985-1992 Adobe Systems Incorporated. All Rights Reserved.
/currentpacking where
{pop/oldpack currentpacking def/setpacking where
{
pop false setpacking
}if
}if
/$brkpage 64 dict def $brkpage begin
/prnt
{dup type/stringtype ne{=string cvs}if dup length 6 mul/tx exch def/ty 10 def
currentpoint/toy exch def/tox exch def 1 setgray newpath
tox toy 2 sub moveto 0 ty rlineto tx 0 rlineto 0 ty neg rlineto
closepath fill tox toy moveto 0 setgray show}bind def
/nl{currentpoint exch pop lmargin exch moveto 0 -10 rmoveto}def
/=={/cp 0 def typeprint nl}def
/typeprint{dup type exec}readonly def
/lmargin 72 def
/rmargin 72 def
/tprint
{dup length cp add rmargin gt{nl/cp 0 def}if
dup length cp add/cp exch def prnt}readonly def
/cvsprint{=string cvs tprint( )tprint}readonly def
/integertype{cvsprint}readonly def
/realtype{cvsprint}readonly def
/booleantype{cvsprint}readonly def
/operatortype{(--)tprint =string cvs tprint(-- )tprint}readonly def
/marktype{pop(-mark- )tprint}readonly def
/dicttype{pop(-dictionary- )tprint}readonly def
/nulltype{pop(-null- )tprint}readonly def
/filetype{pop(-filestream- )tprint}readonly def
/savetype{pop(-savelevel- )tprint}readonly def
/fonttype{pop(-fontid- )tprint}readonly def
/nametype{dup xcheck not{(/)tprint}if cvsprint}readonly def
/stringtype
{dup rcheck{(\()tprint tprint(\))tprint}{pop(-string- )tprint}ifelse
}readonly def
/arraytype
{dup rcheck{dup xcheck
{({)tprint{typeprint}forall(})tprint}
{([)tprint{typeprint}forall(])tprint}ifelse}{pop(-array- )tprint}ifelse
}readonly def
/packedarraytype
{dup rcheck{dup xcheck
{({)tprint{typeprint}forall(})tprint}
{([)tprint{typeprint}forall(])tprint}ifelse}{pop(-packedarray- )tprint}ifelse
}readonly def
/courier/Courier findfont 10 scalefont def
end
errordict/handleerror
{systemdict begin $error begin $brkpage begin newerror
{/newerror false store
vmstatus pop pop 0 ne{grestoreall}if initgraphics courier setfont
lmargin 720 moveto(ERROR: )prnt errorname prnt
nl(OFFENDING COMMAND: )prnt/command load prnt
$error/ostack
known{nl nl(STACK:)prnt nl nl $error/ostack get aload length{==}repeat}if
systemdict/showpage get exec(%%[ Error: )print
errorname =print(; OffendingCommand: )print/command
load =print( ]%%)= flush}if end end end}
dup 0 systemdict put dup 4 $brkpage put bind readonly put
/currentpacking where
{pop/setpacking where
{
pop oldpack setpacking
}if
}if
%%EndFile
%%BeginFile: adobe_psp_basic
%%Copyright: Copyright 1990-1992 Adobe Systems Incorporated. All Rights Reserved.
/bd{bind def}bind def
/xd{exch def}bd
/xs{exch store}bd
/ld{load def}bd
/Z{0 def}bd
/T/true
/F/false
/l/lineto
/lw/setlinewidth
/m/moveto
/rl/rlineto
/rm/rmoveto
/c/curveto
/t/translate
/C/closepath
/gS/gsave
/gR/grestore
/np/newpath
13{ld}repeat
/$m matrix def
/av 80 def
/por true def
/normland false def
/psb-nosave{}bd
/pse-nosave{}bd
/us Z
/psb{/us save store}bd
/pse{us restore}bd
/level2
/languagelevel where
{
pop languagelevel 2 ge
}{
false
}ifelse
def
/featurecleanup
{
stopped
cleartomark
countdictstack exch sub dup 0 gt
{
{end}repeat
}{
pop
}ifelse
}bd
/noload Z
/startnoload
{
{/noload save store}if
}bd
/endnoload
{
{noload restore}if
}bd
level2 startnoload
/setjob
{
statusdict/jobname 3 -1 roll put
}bd
/setcopies
{
userdict/#copies 3 -1 roll put
}bd
level2 endnoload level2 not startnoload
/setjob
{
1 dict begin/JobName xd currentdict end setuserparams
}bd
/setcopies
{
1 dict begin/NumCopies xd currentdict end setpagedevice
}bd
level2 not endnoload
/pm Z
/mT Z
/sD Z
/initializepage
{
/pm save store mT concat
}bd
/endp
{
pm restore showpage
}def
/$c/DeviceRGB def
/rectclip where
{
pop/rC/rectclip ld
}{
/rC
{
np 4 2 roll
m
1 index 0 rl
0 exch rl
neg 0 rl
C
clip np
}bd
}ifelse
/rectfill where
{
pop/rF/rectfill ld
}{
/rF
{
gS
np
4 2 roll
m
1 index 0 rl
0 exch rl
neg 0 rl
fill
gR
}bd
}ifelse
/rectstroke where
{
pop/rS/rectstroke ld
}{
/rS
{
gS
np
4 2 roll
m
1 index 0 rl
0 exch rl
neg 0 rl
C
stroke
gR
}bd
}ifelse
%%EndFile
%%BeginFile: adobe_psp_colorspace_level1
%%Copyright: Copyright 1991-1992 Adobe Systems Incorporated. All Rights Reserved.
/G/setgray ld
/K/setrgbcolor ld
%%EndFile
%%BeginFile: adobe_psp_basic_text
%%Copyright: Copyright 1990-1992 Adobe Systems Incorporated. All Rights Reserved.
/S/show ld
/A{
0.0 exch ashow
}bd
/R{
0.0 exch 32 exch widthshow
}bd
/W{
0.0 3 1 roll widthshow
}bd
/J{
0.0 32 4 2 roll 0.0 exch awidthshow
}bd
/V{
0.0 4 1 roll 0.0 exch awidthshow
}bd
/fcflg true def
/fc{
fcflg{
vmstatus exch sub 50000 lt{
(%%[ Warning: Running out of memory ]%%\r)print flush/fcflg false store
}if pop
}if
}bd
/$f[1 0 0 -1 0 0]def
/MacEncoding StandardEncoding 256 array copy def
MacEncoding 39/quotesingle put
MacEncoding 96/grave put
/Adieresis/Aring/Ccedilla/Eacute/Ntilde/Odieresis/Udieresis/aacute
/agrave/acircumflex/adieresis/atilde/aring/ccedilla/eacute/egrave
/ecircumflex/edieresis/iacute/igrave/icircumflex/idieresis/ntilde/oacute
/ograve/ocircumflex/odieresis/otilde/uacute/ugrave/ucircumflex/udieresis
/dagger/degree/cent/sterling/section/bullet/paragraph/germandbls
/registered/copyright/trademark/acute/dieresis/notequal/AE/Oslash
/infinity/plusminus/lessequal/greaterequal/yen/mu/partialdiff/summation
/product/pi/integral/ordfeminine/ordmasculine/Omega/ae/oslash
/questiondown/exclamdown/logicalnot/radical/florin/approxequal/Delta/guillemotleft
/guillemotright/ellipsis/space/Agrave/Atilde/Otilde/OE/oe
/endash/emdash/quotedblleft/quotedblright/quoteleft/quoteright/divide/lozenge
/ydieresis/Ydieresis/fraction/currency/guilsinglleft/guilsinglright/fi/fl
/daggerdbl/periodcentered/quotesinglbase/quotedblbase/perthousand
/Acircumflex/Ecircumflex/Aacute/Edieresis/Egrave/Iacute/Icircumflex/Idieresis/Igrave
/Oacute/Ocircumflex/apple/Ograve/Uacute/Ucircumflex/Ugrave/dotlessi/circumflex/tilde
/macron/breve/dotaccent/ring/cedilla/hungarumlaut/ogonek/caron
MacEncoding 128 128 getinterval astore pop
level2 startnoload
/copyfontdict
{
findfont dup length dict
begin
{
1 index/FID ne{def}{pop pop}ifelse
}forall
}bd
level2 endnoload level2 not startnoload
/copyfontdict
{
findfont dup length dict
copy
begin
}bd
level2 not endnoload
md/fontname known not{
/fontname/customfont def
}if
/MacReEncode
{
copyfontdict
/Encoding MacEncoding def
fontname currentdict
end
definefont $f makefont def
}bd
/BeginSpecialReEncode
{
copyfontdict
/Encoding Encoding 256 array copy def
Encoding dup
}bd
/pd{put dup}bd
/EndSpecialReEncode
{
pop pop
fontname currentdict
end
definefont $f makefont def
}bd
/scf
{
scalefont def
}bd
/scf-non
{
$m scale makefont setfont
}bd
/ps Z
/fz{/ps xs}bd
/sf/setfont ld
/cF/currentfont ld
/mbf
{
/makeblendedfont where
{
pop
makeblendedfont
/ABlend exch definefont
}{
pop
}ifelse
def
}def
%%EndFile
/currentpacking where {pop sc_oldpacking setpacking}if
end % md
%%EndProlog
%%BeginSetup
md begin
countdictstack
[
{%stopped
%%BeginFeature: *ManualFeed False
level2 {1 dict dup /ManualFeed false put setpagedevice}{statusdict begin /manualfeed false store end} ifelse
%%EndFeature
}featurecleanup
countdictstack
[
{%stopped
%%BeginFeature: *InputSlot Upper
%%EndFeature
}featurecleanup
(eneko agirre)setjob
/mT [1 0 0 -1 28 811] def
/sD 16 dict def
300 level2{1 dict dup/WaitTimeout 4 -1 roll put setuserparams}{statusdict/waittimeout 3 -1 roll put}ifelse
%%IncludeFont: Times-Bold
%%IncludeFont: Times-Roman
%%IncludeFont: Times-Italic
%%IncludeFont: Courier
%%IncludeFont: Symbol
/f0_1/Times-Bold MacReEncode
/f0_16 f0_1 16 scf
/f0_12 f0_1 12 scf
/f0_11 f0_1 11 scf
/f0_10 f0_1 10 scf
/f1_1/Times-Roman MacReEncode
/f1_10 f1_1 10 scf
/f1_9 f1_1 9 scf
/f2_1/Times-Italic MacReEncode
/f2_10 f2_1 10 scf
/f3_1/Courier MacReEncode
/f3_10 f3_1 10 scf
/f4_1/Symbol BeginSpecialReEncode
240/apple pd
EndSpecialReEncode /f4_10 f4_1 10 scf
/Courier findfont [10 0 0 -10 0 0] makefont setfont
%%EndSetup
%%Page: 1 1
%%BeginPageSetup
initializepage
(eneko agirre; page: 1 of 6)setjob
%%EndPageSetup
gS 0 0 538 781 rC
28 73 m
f0_16 sf
-.058(EDBL: a Multi-Purposed Lexical Support for the Treatment of Basque)A
42 108 m
f0_12 sf
2.606 .261(Aduriz I., Aldezabal I., Ansa O., Artola X., D\222az de Ilarraza A., Insausti J. M.)J
246 119 m
f1_10 sf
-.274(IXA Taldea)A
170 130 m
-.026(Department of Computer Languages and Systems)A
202 141 m
-.031(University of the Basque Country)A
176 152 m
-.015(20080 Donostia - The Basque Country \(Spain\))A
209 163 m
f3_10 sf
([jibxuxen@si.ehu.es])S
122 223 m
f0_10 sf
.543(Abstract)A
26 236 m
f1_9 sf
1.092 .109(EDBL \(Euskararen Datu-Base Lexikala\) is a lexical database)J
26 246 m
2.472 .247(\(LDB\) for Basque. Seen as a large repository of lexical)J
26 256 m
3.247 .325(information, it acts as the basis for different tasks in)J
26 266 m
1.095 .11(automatic processing: it must be both source and support for)J
26 276 m
3.746 .375(the lexicons needed in different applications. Besides)J
26 286 m
1.944 .194(providing for multiple applications, the lexical database is)J
26 296 m
1.677 .168(intended to be neutral in relation to the different linguistic)J
26 306 m
1.019 .102(formalisms, flexible and open enough to accept new types of)J
26 316 m
.887 .089(information, and easy to use.)J
26 326 m
1.185 .119(The applications of the database are presently the following:)J
26 336 m
3.672 .367(morphological analysis, spell checking and correction,)J
26 346 m
4.513 .451(\(semi\)automatic lemmatisation and tagging; syntactic)J
26 356 m
.999 .1(analysis and analysis of textual corpora will be tasks we will)J
26 366 m
2.17 .217(inplement in the short term. These applications constitute)J
26 376 m
1.064 .106(the necessary foundations of present and further work on the)J
26 386 m
.944 .094(processing of Basque.)J
26 396 m
2.375 .237(This paper presents the conceptual schema and the main)J
26 406 m
.995 .099(features of this database, conceived as a general lexical basis)J
26 416 m
2.852 .285(for the automatic treatment of Basque. Extended Entity)J
26 426 m
1.236 .124(Relationship diagrams and TEI-conformant feature-structures)J
26 436 m
.802 .08(coded in SGML are used to explain the conceptual schema of)J
26 446 m
1.567 .157(the LDB. In the short term, these feature-structures will be)J
26 456 m
1.852 .185(used as a standard input and delivery format for the tools)J
26 466 m
1.012 .101(described above.)J
108 489 m
f0_12 sf
.425(Introduction)A
26 503 m
f1_10 sf
1.995 .2(In this article we introduce the Lexical Database for)J
26 514 m
-.006(Basque \(EDBL\), which is currently being used as a lexical)A
26 525 m
1.59 .159(support for the automatic treatment of Basque. EDBL)J
26 536 m
.274 .027(\(Agirre )J
f2_10 sf
.193 .019(et al.)J
f1_10 sf
.24 .024(, 1995\), a large source of lexical information)J
26 547 m
.24 .024(\(with about 70.000 entries\), has been designed as a )J
f2_10 sf
.079(multi-)A
26 558 m
.218(purposed)A
f1_10 sf
.741 .074( lexical support for different processes: a goal-)J
26 569 m
.057 .006(independent lexical source for the treatment of the Basque)J
26 580 m
1.338 .134(language. The lexicons obtained from the database are)J
26 591 m
2.653 .265(afterwards used in tools such as the morphological)J
26 602 m
.533 .053(analyser \(Urkia, 1997\), the speller/checker \(Agirre )J
f2_10 sf
.364 .036(et al.)J
f1_10 sf
(,)S
26 613 m
1.629 .163(1992\) as well as the tagger/lemmatiser \(Aduriz )J
f2_10 sf
1.252 .125(et al.)J
f1_10 sf
(,)S
26 624 m
1.627 .163(1996\). In the short term, we will use TEI-conformant)J
26 635 m
.222 .022(feature-structures coded in SGML as a standard input and)J
26 646 m
-.076(delivery format for the tools described above.)A
26 657 m
2.306 .231(Apart from the features already described, EDBL is)J
26 668 m
-.055(characterised by the following ones:)A
26 679 m
f4_10 sf
(\267)S
44 679 m
f2_10 sf
.216(Neutral)A
f1_10 sf
.748 .075(: the linguistic descriptions held in it should)J
44 690 m
.1 .01(not constrain any applications in the future. This does)J
44 701 m
.593 .059(not mean, obviously, that no formalism will be used)J
44 712 m
.306 .031(in these linguistic descriptions, but that the LDB will)J
44 723 m
.883 .088(remain open to new descriptions, compatible or not)J
44 734 m
1.242 .124(with previous ones. We have used the well-known)J
296 212 m
1.616 .162("Two-level morphology" \(Koskenniemi, 1983\) for)J
296 223 m
-.057(the morphological description of Basque.)A
278 234 m
f4_10 sf
(\267)S
296 234 m
f2_10 sf
.929 .093(Open and flexible)J
f1_10 sf
.709 .071(: EDBL is capable of being easily)J
296 245 m
.363 .036(adapted when necessary, as well as of accepting new)J
296 256 m
-.105(kind of data.)A
278 267 m
f4_10 sf
(\267)S
296 267 m
f2_10 sf
.092(User-friendly)A
f1_10 sf
.397 .04(: This database was originally designed)J
296 278 m
1.177 .118(and adapted to be used by applications and human)J
296 289 m
.302 .03(\(specialised or not\) users. Applying )J
f2_10 sf
.076(user-friendliness)A
296 300 m
f1_10 sf
1.038 .104(to its interface, the database was conceived to help)J
296 311 m
.041 .004(both normal and specialised users, giving them strong)J
296 322 m
3.862 .386(support when the tasks can be automatically)J
296 333 m
-.228(managed.)A
278 344 m
2.556 .256(First of all, we shall introduce the structure of the)J
278 355 m
.783 .078(database, describing also the reasons for which we have)J
278 366 m
.184 .018(adopted this structure. Later on, we will describe the TEI-)J
278 377 m
.874 .087(conformant SGML Feature Structures representation we)J
278 388 m
-.134(have developed for data in EDBL.)A
293 411 m
f0_12 sf
2.441 .244(General Description of the Database)J
369 422 m
.337(Structure)A
278 436 m
f1_10 sf
.067 .007(In order to describe the structure of this database, we have)J
278 447 m
.488 .049(used the Extended Entity Relationship \(EER\) data model)J
278 458 m
2.102 .21(based on the Entity Relationship \(ER\) model, as we)J
278 469 m
3.222 .322(consider it suitable for describing the hierarchical)J
278 480 m
-.006(relationships amongst the different objects in EDBL.)A
278 503 m
f0_11 sf
-.124(Main Entities in EDBL)A
278 517 m
f1_10 sf
1.123 .112(The main data entity in EDBL is the one called EDBL)J
278 528 m
.181 .018(Units, the key of which is composed by a headword and a)J
278 539 m
.789 .079(homograph identifier, as in any conventional dictionary.)J
278 550 m
1.575 .157(This homograph identifier lets us distinguish different)J
278 561 m
-.039(senses for a dictionary entry. This data class can be viewed)A
278 572 m
.155 .015(from three different standpoints, as it holds three different)J
278 583 m
.409 .041(specialisation relationships depending on several features)J
278 594 m
1.136 .114(we describe below. Although all the specialisations are)J
278 605 m
.913 .091(total \(all entries belong to the three specialisation units,)J
278 616 m
.97 .097(thick line in the diagram\), we want to remark that each)J
278 627 m
.014 .001(one is separately followed by a disjointed specialisation \(d)J
278 638 m
.084 .008(within a circle\). Let us have a glance at them.)J
278 661 m
f0_11 sf
.411 .041(Standard and Non-Standard Forms.)J
278 675 m
f1_10 sf
.111 .011(This partial specialisation divides the considered elements)J
278 686 m
.36 .036(into standard and non-standard entries \(figure 1\). For any)J
278 697 m
1.322 .132(lexical entry in the database, being a standard element)J
278 708 m
.38 .038(implies it is correctly spelled and hence, it is accepted by)J
278 719 m
.346 .035(The Basque Language Academy as a lexical entry for the)J
278 730 m
.363 .036(Standard Basque. As the normalisation of the language is)J
278 741 m
2.91 .291(still in process, the number of non-standard forms)J
278 752 m
.622 .062(currently used is quite large. The forms belonging to the)J
endp
%%Page: 2 2
%%BeginPageSetup
initializepage
(eneko agirre; page: 2 of 6)setjob
%%EndPageSetup
gS 0 0 538 781 rC
26 49 m
f1_10 sf
-.001(Non-Standard class can be either dialectal forms \(both at a)A
26 60 m
.439 .044(lexical and a morphemic level\) or badly used words. The)J
26 71 m
.864 .086(relationship placed between Standard and Non-Standard)J
26 82 m
1.307 .131(elements allows us to relate those correct forms to the)J
26 93 m
1.165 .117(ones considered to be incorrect. The existence of more)J
26 104 m
.108 .011(than one dialectal form for any standard entry implies that)J
26 115 m
.133 .013(the cardinality of this relationship is 0:n, i.e. we can relate)J
26 126 m
-.055(zero or more Non-Standard entries to a Standard one in our)A
26 137 m
-.24(database.)A
26 159 m
f0_11 sf
-.01(Dictionary Entries and Other Entries)A
26 173 m
f1_10 sf
1.143 .114(The main specialisation in this database is the one that)J
26 184 m
-.02(separates Dictionary Entries from the Other Entries \(figure)A
26 195 m
-.033(2\). In the class related to dictionary entries, we include any)A
26 206 m
.013 .001(lexical entry that could be found in an ordinary dictionary,)J
26 217 m
3.076 .308(those with a main category such as nouns, verbs,)J
26 228 m
2.256 .226(adjectives, etc. as well as abbreviations and initials,)J
278 49 m
.344 .034(compounds and derivatives \(figure 3\). On the other hand,)J
278 60 m
2.408 .241(the class named Other Entries is specialised in two)J
278 71 m
1.351 .135(subclasses, Non-Independent Morphemes and Inflected)J
278 82 m
2.311 .231(Forms. While non-independent morphemes require a)J
278 93 m
2.621 .262(lemma for their use inside a word, inflected forms)J
278 104 m
1.902 .19(implicitly take case, number, etc. and hence need no)J
278 115 m
.843 .084(morphemes to add such information. We have created a)J
278 126 m
3.433 .343(relationship between all inflected forms and their)J
278 137 m
2.417 .242(corresponding lemma entries. The cardinality of the)J
278 148 m
.501 .05(relationship is as follows: while every inflected form has)J
278 159 m
.728 .073(only one dictionary entry lemma \(cardinality 1:1\), every)J
278 170 m
.353 .035(lemma \(a dictionary entry\) may be related to zero, one or)J
278 181 m
1.403 .14(more inflected forms. As we can see in figure 2, both)J
278 192 m
1.259 .126(subclasses \(Non-Independent Morphemes and Inflected)J
278 203 m
.094 .009(Forms\) are the total disjunctive specialisation of the Other)J
278 214 m
.107 .011(Entries class.)J
151 421 m
-.059(Figure 1: Standard and Non-Standard Entities\253 Relationship)A
176 718 m
-.011(Figure 2: Dictionary Entries and Other Entries)A
26 735 m
-.104(The entries we have considered as inflected forms are either)A
26 746 m
1.16 .116(the ones that would need a very complex morphotactic)J
278 735 m
1.695 .17(treatment \(e.g. inflected verb forms\), or the ones that)J
278 746 m
2.031 .203(cannot be morphologically analysed and decomposed)J
endp
%%Page: 3 3
%%BeginPageSetup
initializepage
(eneko agirre; page: 3 of 6)setjob
%%EndPageSetup
gS 0 0 538 781 rC
26 49 m
f1_10 sf
2.518 .252(\(other inflected forms\), as the whole morphological)J
26 60 m
-.003(information is conveyed by themselves.)A
26 82 m
f0_11 sf
-.006(Multiword and Simple-Word Entries.)A
26 96 m
f1_10 sf
.995 .1(In order to finish this brief description, we will explain)J
26 107 m
1.104 .11(the specialisation of the main class as divided into two)J
26 118 m
1.112 .111(partial subclasses, one containing Simple Lexical Units)J
26 129 m
1.801 .18(\(SLU\) and another one containing Multiword Lexical)J
26 140 m
.149 .015(Units \(MWLU\); look at figure 4. We define a SLU as any)J
278 49 m
.713 .071(string of characters between two blanks, that is, a single)J
278 60 m
.193 .019(typographic unit \(hyphened forms included\). On the other)J
278 71 m
.709 .071(hand, we consider MWLU all lexical units expressed by)J
278 82 m
.601 .06(two or more items placed between blanks. The way they)J
278 93 m
-.017(are stored depends upon whether they are SLU or MWLU,)A
278 104 m
2.728 .273(as the classification of the MWLU entries is quite)J
278 115 m
.176 .018(complex. Let us have a look at each one of them in turn.)J
212 367 m
.095 .009(Figure 3: Dictionary Entries)J
33 390 m
.04 .004(Figure 4: SLU and MWLU Entries)J
26 419 m
f0_10 sf
15.559 1.556(Simple Word Entries')J
26 430 m
11.74 1.174(morphological components)J
26 441 m
f1_10 sf
6.161 .616(\(figure 5\). As Basque is an)J
26 452 m
1.374 .137(agglutinative language, it presents a)J
26 463 m
3.034 .303(relatively high power to generate)J
26 474 m
.469 .047(inflected word-forms; any word entry)J
26 485 m
3.992 .399(independently takes each of the)J
26 496 m
4.893 .489(necessary elements \(the affixes)J
26 507 m
3.852 .385(corresponding to the determiner,)J
26 518 m
1.302 .13(number and declension features\) for)J
26 529 m
.453 .045(the different \(syntactic case included\))J
26 540 m
1.814 .181(functions. Moreover, noun ellipsis can occur inside a)J
26 551 m
2.872 .287(complex noun due to recursive constructions, even)J
26 562 m
.692 .069(though, in practice, it is not usual to find more than two)J
26 573 m
-.04(levels of recursion in a word-form.)A
26 584 m
.661 .066(Therefore, it is necessary to apply a mechanism that lets)J
26 595 m
.264 .026(us know the morphotactic formation of the morphemes in)J
26 606 m
2.228 .223(a simple word \320that is, the way the morphemes are)J
26 617 m
.097 .01(linked\320, and their necessary morphosyntactic information.)J
26 628 m
.68 .068("The Two-level Morphology" proposed by Koskenniemi)J
26 639 m
.369 .037(\(Koskenniemi, 1983\) was chosen as the most adequate to)J
26 650 m
-.011(represent lexical information in Basque, due to the reasons)A
363 407 m
.242 .024(we mention next: it distinguishes the)J
363 418 m
7.249 .725(algorithm and the language)J
363 429 m
2.317 .232(knowledge; it is a general model)J
363 440 m
3.466 .347(\(applicable to any language\); it)J
363 451 m
2.96 .296(works either as a morphological)J
363 462 m
.66 .066(analyser or generator; and finally, it)J
363 473 m
-.033(distinguishes both the surface and the)A
363 484 m
-.006(lexical level.)A
363 495 m
8.363 .836(We use the Morphotactics)J
363 506 m
1.293 .129(relationship in order to explain the)J
363 517 m
-.066(different morphological aspects SLU-)A
363 528 m
1.654 .165(s adopt. The different entities that)J
278 539 m
1.28 .128(take part in this relationship are the following: Single-)J
278 550 m
.037 .004(word entries, Two-level forms of the entries, Continuation)J
278 561 m
.277 .028(Classes and Lexicons. Every entry is at least related to an)J
278 572 m
.572 .057(only Two-level form, but it may be related to even more)J
278 583 m
.259 .026(entries \(Two-level forms usually have diacritics by which)J
278 594 m
3.581 .358(we the proper morpho-phonological rules will be)J
278 605 m
-.029(selected\). Every Two-level entry is related to a Lexicon, as)A
278 616 m
2.532 .253(well as being attached to a Continuation Class. By)J
278 627 m
1.793 .179(relating Two-level forms to Continuation Classes, we)J
278 638 m
1.091 .109(define the set of morphemes we can add or agglutinate)J
278 649 m
.036 .004(after the stem of an entry.)J
endp
%%Page: 4 4
%%BeginPageSetup
initializepage
(eneko agirre; page: 4 of 6)setjob
%%EndPageSetup
gS 0 0 538 781 rC
177 310 m
f1_10 sf
.168 .017(Figure 5: SLU\253s Morphotactical Relationship)J
26 327 m
.137 .014(Our system consists of a lexicon made up of about 70,000)J
26 338 m
2.035 .204(items, grouped into 120 sublexicons and a set of 24)J
26 349 m
2.269 .227(morpho-phonological rules that describe the changes)J
26 360 m
.909 .091(occurring between the lexical and the surface level. We)J
26 371 m
1.351 .135(have only applied this method to SLU-s in Basque, as)J
26 382 m
1.02 .102(MWLU-s behave differently: we explain this behaviour)J
26 393 m
3.18 .318(upon the use of some MWLU realisation patterns)J
26 404 m
-.183(described below.)A
26 427 m
f0_10 sf
4.689 .469(Multiword Entries' morphological components)J
26 438 m
f1_10 sf
.548 .055(\(figure 6\) It\325s not always easy to decide whether an item)J
26 449 m
.873 .087(must be defined as a MWLU; nonetheless, it is obvious)J
26 460 m
.014 .001(they show a very complex variety when constructed. First,)J
26 471 m
.005 .001(MWLUs' components can appear contiguous or dispersed;)J
278 327 m
1.172 .117(in other words, they do not necessarily occur one after)J
278 338 m
.538 .054(another; second, the order is not fixed, as some MWLUs)J
278 349 m
1.122 .112(must be formed in a restricted word order while others)J
278 360 m
.295 .03(must not: an item may appear in different locations in the)J
278 371 m
.37 .037(MWLU; third, the components may either be inflected or)J
278 382 m
.054 .005(appear as an invariable form. In the case their components)J
278 393 m
1.469 .147(are inflected, some of them may accept any inflection)J
278 404 m
.864 .086(whilst others must only take a restricted set of inflected)J
278 415 m
1.117 .112(attributes. Thus, we must define the restrictions for the)J
278 426 m
.468 .047(components to accept a few inflected forms. And finally,)J
278 437 m
1.794 .179(some MWLUs are sure and some are ambiguous: we)J
278 448 m
.373 .037(cannot accurately assure the same group of words to be a)J
278 459 m
.092 .009(multiword entry in any context.)J
143 731 m
.034 .003(Figure 6: MWLU\253s composition and morphological realisation)J
endp
%%Page: 5 5
%%BeginPageSetup
initializepage
(eneko agirre; page: 5 of 6)setjob
%%EndPageSetup
gS 0 0 538 781 rC
26 49 m
f1_10 sf
1.656 .166(Following these features, we use a formal description)J
26 60 m
.263 .026(where different MWLU realisation patterns are applied to)J
26 71 m
.14 .014(each MWLU entry. As well as declaring the certainty of a)J
26 82 m
.038 .004(MWLU, the realisation patterns restrict the possible order,)J
26 93 m
1.938 .194(contiguity and inflection rules \(for each order\) every)J
26 104 m
.113 .011(element may adopt in the whole lexical unit.)J
38 127 m
f0_12 sf
2.322 .232(EDBL entities represented as Feature)J
111 138 m
.603(Structures.)A
26 152 m
f1_10 sf
.119 .012(As it has been pointed out before, TEI-conformant SGML)J
26 163 m
.636 .064(is used as delivery format for EDBL entities. In order to)J
26 174 m
.847 .085(represent the structure of each one of the entities, typed)J
26 185 m
-.02(Feature Structures \(FS\) will be used. Feature structures are)A
26 196 m
.481 .048(very adequate to encode linguistic information, there is a)J
26 207 m
1.533 .153(well-developed theoretical framework for them, and it)J
26 218 m
1.06 .106(seems that their applicability to encode the information)J
26 229 m
-.006(found in dictionaries, or in lexical databases for NLP, as is)A
26 240 m
-.008(our case, is quite natural \(Ide )A
f2_10 sf
(et al.)S
f1_10 sf
-.01(, 1993\).)A
26 251 m
.438 .044(Instead of defining our own formalism for FS\325s, we have)J
26 262 m
.353 .035(adopted the one defined by TEI-P3, as we found it useful)J
26 273 m
1.599 .16(and neat for our purposes. Following TEI-P3, Feature)J
26 284 m
.099 .01(Structure Declarations \(FSD\325s\) have been made for all the)J
26 295 m
.281 .028(types of FS\325s that are used in EDBL. These FSD\325s reflect)J
26 306 m
.562 .056(the hierarchic class structure of the database, and feature)J
26 317 m
.224 .022(inheritance is used in order to make the definition of each)J
26 328 m
.17 .017(class more consistent and comfortable \(the attribute Base-)J
26 339 m
1.417 .142(Type is used in the definition of a type to declare the)J
26 350 m
1.797 .18(superclass or basic type from which the type defined)J
26 361 m
-.053(inherits features\).)A
26 372 m
1.659 .166(Each one of the classes in the figures above define a)J
26 383 m
.09 .009(different FS type. The main class defines the most general)J
26 394 m
2.723 .272(structure \321)J
f3_10 sf
.722(EDBL-Unit-FS)A
f1_10 sf
2.153 .215(\321, whose features are)J
26 405 m
-.034(inherited by every class and every instance in the database.)A
26 416 m
.351 .035(Let us show the FSD of this main class:)J
26 439 m
f1_9 sf
.462 .046(
)A
278 282 m
.627 .063( )J
278 304 m
.715 .072(