###############################################################################
#
# This is a partial conversion of the rule file from the IceTagger
# part-of-speech tagger for Icelandic by Hrafn Loftsson, along with
# some hacked up rules from Trond Trosterud's Faroese CG and some of
# our very own home-brew crack.
#
###############################################################################
SOFT-DELIMITERS = "<,>" cm ;
DELIMITERS = "<.>" "<:>" "" ">" "<|>" "<$.>" "<$:>" "<$!>" "<$?>" "<$|>" sent;
LIST BOS = (>>>) () (sent) sent ;
LIST EOS = (<<<) () (sent) sent ;
LIST CLB = sent cnjsub cm rel cnjcoo;
LIST Coma = (cm);
LIST N = (n) ;
LIST Num = (num) ("þúsund"i n) ("milljón"i n) ("milljarður"i n) ("hundrað"i n);
LIST ProperNom = (np) ;
LIST Adj = (adj) ;
LIST Weak = (vei) ;
LIST Strong = (sta) ;
LIST Adv = (adv) ;
LIST PersPron = (prn p1) (prn p2) (prn p3) ;
LIST IndefPron = (prn ind);
LIST DemPron = (prn dem) ;
LIST Refl = (prn ref) ;
LIST InterrPron = (prn itg) ;
LIST PossPron = (prn pos) ;
LIST Pron = (prn);
LIST Prep = (pr) ;
LIST Participle = (pp) (supn);
LIST PP = (actv pp) ;
LIST Inf = (inf) ;
LIST VerbInf = (vblex inf) ;
LIST VerbSpecAux = (vaux) ;
LIST VerbAux = (vaux) (vbhaver) (vbser) ("verða" vblex) ("geta" vblex); ## check verða = aux ?
LIST VerbSupine = (vblex supn) ;
LIST VerbBe = (vbser) ;
LIST Verb = vblex vbser vbhaver vaux;
LIST TempsFin = (pri) (past) (prs) (pss);
LIST TempsIrreal = (prs) (pss);
LIST Rel = (rel) ;
LIST Conj = cnjcoo cnjadv cnjsub ;
LIST CC = (cnjcoo) ;
LIST CS = (cnjsub) ;
LIST Exclamation = (ij) ;
LIST Article = (def) ;
LIST Det = (det) ;
LIST Interj = (itg) ;
LIST Def = (def) ;
LIST Indef = (ind) ;
LIST Fem = (f);
LIST Msc = (m);
LIST Neu = (nt);
LIST Sg = (sg);
LIST Pl = (pl);
LIST Nom = (nom);
LIST Gen = (gen);
LIST Acc = (acc);
LIST Dat = (dat);
LIST Inf = (inf);
LIST GenPrep = "án"ri "auk"ri "austan"ri "innan"ri "í staö"ri "meðal"ri "megin"ri "milli"ri "millum"ri
"neðan"ri "norðan"ri "ofan"ri "sakir"ri "sunnan"ri "sökum"ri "til"ri "utan"ri "vegna"ri "vestan"ri ;
LIST AccPrep = "fyrir ofan"ri "gegnum"ri "kringum"ri "um"ri "umfram"ri "umhverfis"ri ;
LIST DatPrep = "að"ri "af"ri "andspænis"ri "ásamt"ri "frá"ri "gagn"ri "gagnvart"ri "gegnt"ri "handa"ri
"hjá"ri "meðfram"ri "mót"ri "móti"ri "nálægt"ri "undan"ri "úr"ri ;
LIST AccDatPrep = "á"ri "eftir"ri "fyrir"ri "í"ri "með"ri "undir"ri "við"ri "yfir"ri ;
LIST MODV = "ætla" "kunna" "láta" "skula" "vilja" "munu" "mega" "vera" ; #todo: translate fao to icelandic
LIST ACCSUBJVERB = "bresta" "dreyma" "fýsir" "greinir" "grípa" "gruna" "hrylla" "hungra" "kitla" "klígja" "klæja" "kreppa" "langa" "lengja" "lengja# eftir" "lysta" "lægja" "misminnir" "minnir" "muna" "órar" "raga" "ráma" "reka" "saka" "setur" "skefur" "skipta" "skorta" "stoða" "sundla" "svengja" "svima" "svíða" "syfja" "undra" "vanhagar" "vanta" "varðar" "verkjar" "þrjóta" "þverra" ;
LIST DATSUBJVERB = "auðnast" "áskotnast" "batna" "ber" "berast" "birtast" "bjóða" "blandast" "blæða" "blæða# út" "blöskra" "bragðast" "bregða" "brenna" "búa" "bætast" "daprast" "dáma" "dettta" "drjúpa" "duga" "dveljast" "dyljast" "elna" "endast" "fara" "feila" "festast" "fjölga" "fljúga" "fylgja" "fyrirgefa" "fæðast" "fækka" "förla" "gagnast" "ganga" "geðjast" "gefast" "gremjast" "greypast" "græðast" "haldast" "hefnast" "heilsast" "henta" "heppnast" "heyrast" "henta" "hlaupa" "hlekkjast" "hlotnast" "hlýnar" "hnignar" "hrakar" "hrjósa" "hugkvæmast" "hugnast" "hæfa" "hægja" "hættir" "kemur" "kólna" "kyngir" "lánast" "lást" "leiðast" "leyfast" "ljúka" "ljósta" "lærast" "misfarast" "misheppnast" "misheyrast" "nægja" "ofbjóða" "óa" "ratast" "reiknast" "réna" "rigna" "rísa" "sárna" "seinka" "sinnast" "sjatna" "sjást" "skána" "skeika" "skiljast" "skjátlast" "skola" "skrika" "snúast" "sortna" "stafa" "standa" "stígur" "stekkur" "súrna" "svelgjast" "svipa" "svíða" "sýnast" "sækjast" "sæma" "takast" "teljast" "mér" "vefjast" "vegna" "veita" "versna" "viðkoma" "viðvíkja" "vinnast" "vinnast" "vitnast" "volgna" "vökna" "yfirsjást" "ylna" "þoka" "þóknast" "líða" ("finna" midv) ("skilja" midv) "líka" "mislíka"; ## TODO: CHECK 'líða' and 'finnst'
LIST WANTINGVERB = "vilja";
LIST REPORTINGVERB = "segja";
LIST TIMEADVERB = "á morgun" "í gær" "í dag";
LIST SADV = "aldrei" "ekki" "alltaf" ; # complete
LIST NAGD = nom acc gen dat;
LIST GENDER = m f nt;
LIST NUMBER = sg pl;
LIST PERSON = p1 p2 p3;
LIST COMMA = (",") ;
SET VerbFin = Verb + TempsFin;
SET NomMscFem = (n m) OR (n f) ;
SET NomMscNeu = (n m) OR (n nt) ;
SET NomFemNeu = (n f) OR (n nt) ;
SET NOTDAT = Nom | Gen | Acc ;
SET NOTACC = Nom | Gen | Dat ;
SET NOTACCDAT = Nom | Gen ;
SET OBL = Acc | Dat | Gen ;
SET POST-DET = Adj | Det | N | Num | Adv ;
SET PRE-N = Adj | Det | (N Gen) | Num | (Pron Gen) | (Pron Dem) | CC ; # Det???
SET NP-HEAD = N | Pron ;
SET WORD = N | Verb | Adj | Prep | Pron | Det | Adv | CC | CS | Interj | Num | ("\?") ;
SET MARK = COMMA | ("\\") | ("\;") ; #"
SET WORDMARK = WORD | MARK ;
SET NPNHA = WORDMARK - PRE-N - Adv ;
SECTION
#################################################################################
# Specific lexemes
#################################################################################
REMOVE N IF (-1C N OR ProperNom) (0 ("<á>"));
REMOVE N IF (1 PP | VerbSupine) (0 ("hafa"));
#Finnland er í Evrópusambandinu og er eina Norðurlandaþjóðin sem hefur tekið upp evruna sem gjaldmiðil.
REMOVE Rel IF (0 ("sem")) (NOT 1* VerbFin BARRIER CLB);
REMOVE Conj IF (0 ("er")) (-1 Conj);
#Ekki er búið að boða hvenær það hefst.
REMOVE Rel IF (0 ("er")) (NOT -1 COMMA) ;
#Töldu margir þingmenn að skýrslan gæti verið gott innlegg í umræðuna og gæti aukið jöfnuð á ný meðal Kínverja.
REMOVE ("gæta") IF (0 ("gæta")) (0 ("geta")) (1 Adj | PP | VerbSupine);
#################################################################################
# Proper nouns
#################################################################################
# hús Láru
SELECT ProperNom IF (-1C N) (1 Gen) ; # !tag.isCase(GENITIVE) ;
SELECT ProperNom IF (-1C GenPrep) (1 Gen) ;
SELECT ProperNom IF (-1C AccPrep) (1 Acc) ;
SELECT ProperNom IF (-1C DatPrep) (1 Dat) ;
# Hildar Guðmundsdóttur
SELECT ProperNom IF (-1C ProperNom) (-1C Gen) (0 ("<.*dóttur>"r)) (0 Gen) ; # !tag.isCase(GENITIVE) ;
SELECT ProperNom IF (-1C ProperNom) (-1C Fem) ; # !tag.isGender(FEMININE) ;
SELECT ProperNom IF (-1C ProperNom) (-1C Msc) ; # !tag.isGender(MASCULINE) ;
REMOVE ProperNom + Acc IF (1C* Acc BARRIER CLB);
#################################################################################
# Noms
#################################################################################
# hann sýnir but stakk hún hendi is ok, það sýnir aldrei
# CEx. Þessi íslenska kona
#SELECT Nom IF ((-1C PersPron) OR (-1C PersPron LINK -1C DemPron) OR (-1C (""ri)));
# CEx. um nýja samkomulagið
#SELECT N IF (-1C GenPrep) (1 Gen) ;
#SELECT N IF (-1C AccPrep) (1 Acc) ;
#SELECT N IF (-1C DatPrep) (1 Dat) ;
# að (cn/aa) beina (sgn/n*) mér
# Íslendingar munu boða að
REMOVE N IF (-1 Inf) (0 VerbInf) (NOT -1 MODV) ;
# einhvern sem sæti
# sem steðjar að heiminum
REMOVE N IF (-1 (""ri)) (0 Verb) (NOT 1 ("að"ri));
# choose the verb
# CEx. Ísland á dagskrá eftir viku
# CEx. Fundi Íslendinga með Bretum og Hollendingum um Icesave málið lauk í dag án niðurstöðu.
#REMOVE N IF (-1C ProperNom) (0 Verb) ;
# Disallow two nouns in a row unless the latter is in the genitive case
REMOVE N IF (-1C N) (NOT -1 Gen) ((NOT -2 ProperNom) OR (-2 BOS));
# select the possessive pronoun, "pabbi sinn"
SELECT PossPron IF (-1C N);
SELECT ("á"ri) IF (-1 Adv);
# skal (ekki) hita
SELECT VerbInf IF ((-1 VerbSpecAux) OR (-1C Adv LINK NOT -2 BOS LINK -2 VerbSpecAux));
# hafði talið, velja sagnbót
# þetta kerfi get ekki þýtt neitt.
SELECT VerbSupine IF ((-1 VerbAux) OR (-2 VerbAux));
# select the adjective, but ok is "fyrir framan X", and ok is "bara verð markaðarins"
SELECT Adv IF (0 Adj) (-1 BOS OR (NOT -1 Prep)) (1 EOS OR (NOT 1 N LINK 1 Gen));
# sú líkn
# CEx. að í því hefðu fyrirvararnir
#SELECT DemPron IF (-1 N) (NOT 0 N);
#SELECT VerbBe IF (NOT 0 N);
# select the adjective, "hann er lasinn",
SELECT Adj IF (-1 VerbBe) (NOT -2 BOS) (-2 PersPron);
#Auðvitað brá okkur við þetta
REMOVE N IF (NOT -1* VerbFin BARRIER BOS) (NOT 1* VerbFin BARRIER EOS) (0 VerbFin) (0 N);
#Bandaríkin heita aðstoð
REMOVE Adj IF (NOT -1* VerbFin BARRIER BOS) (NOT 1* VerbFin BARRIER EOS) (0 VerbFin) (0 Adj);
#Finnland er í Evrópusambandinu og er eina Norðurlandaþjóðin sem hefur tekið upp evruna sem gjaldmiðil.
REMOVE Conj IF (NOT -1* VerbFin BARRIER BOS) (NOT 1* VerbFin BARRIER EOS) (0 VerbFin) (0 Conj);
REMOVE Conj IF (NOT -1* VerbFin BARRIER BOS) (NOT 1* VerbFin BARRIER Conj LINK 1 VerbFin) (0 VerbFin) (0 Conj);
#Að því loknu verður málið rætt á þingi.
REMOVE N + Acc IF (0 Nom OR Acc) (NOT -1* Pron + Nom OR N + Nom BARRIER CLB) (NOT 1* Pron + Nom OR N + Nom BARRIER CLB) (NOT 1* ACCSUBJVERB BARRIER CLB) (NOT -1* ACCSUBJVERB BARRIER CLB);
#Þór býr í höll sem heitir Bilskirnir
REMOVE Adj IF (-1C Prep) (1 Conj | Rel);
#Nánast allir Íslendingar nota nú netið með ADSL.
REMOVE Nom IF (0C Nom OR Acc) (-1C* NP-HEAD + Nom + $$NUMBER LINK 1* VerbFin + $$NUMBER BARRIER CLB) (NOT -1* DATSUBJVERB OR ACCSUBJVERB BARRIER CLB);
#################################################################################
# Adverbs
#################################################################################
#Hin íslenska Wikipedia fór í gang 5. desember 2003 og inniheldur núna 27.671 greinar.
REMOVE ("núa") + PP IF (0 ("núna") + Adv) (NOT -1C* VerbAux BARRIER CLB) (NOT 1C* VerbAux BARRIER CLB);
#Rouston Benoit, 10 ára, var á gangi með ömmu sinni þegar skjálftinn reið yfir.
REMOVE ("þegar") + Adv IF (-1* VerbFin BARRIER BOS) (1* VerbFin BARRIER EOS);
#################################################################################
# Adjectives
#################################################################################
# Hann borðar mjög heita súpu en hún hleypur.
REMOVE N IF (-1C (""));
REMOVE VerbInf IF (-1C (""));
REMOVE VerbFin IF (-1C (""));
# norska ríkisstjórnin
REMOVE N IF (0 N) (0 Nom) (0 Adj) (0 (vei)) (1C N) (1C Def);
# íslenskan almenning
REMOVE N IF (0 N) (0 Nom) (0 Adj) (0 (sta)) (1C N) (1C Indef);
# kröfurnar voru skýrar;
REMOVE Adv IF (0 Adv) (0 Adj) (-1C VerbBe) (1C CLB);
# Fallega rauða rósin
REMOVE Adv IF (NOT -1C* VerbFin BARRIER CLB) (0 Adv) (0 Adj) (1 Adj);
# Ég er að flytja frá Akureyri til Reykjavíkur í eitt ár.
REMOVE Adj IF (-1 Prep) (0 Adj) (0 Num) (1 N) (1 Sg);
#Bankarnir stóru.
REMOVE Adj + Strong IF (-1C N + Def);
#################################################################################
# Numerals
#################################################################################
# Nefnifall er eitt af fjórum föllum í íslensku
REMOVE ("einn") + Adj IF (1C ("af"));
#################################################################################
# Advs
#################################################################################
SELECT Adv IF (-1C VerbFin) (1C Verb) (1 Participle);
#################################################################################
# Personal pronouns
#################################################################################
#ég gerði það sjálfur
SELECT PersPron + Acc IF (-1 VerbFin) (-1C* PersPron + Nom BARRIER VerbFin | CLB);
#Við gerðum það sjálf.
SELECT:r53p $$PERSON IF (1C VerbFin + $$PERSON)(0 PersPron) ;
#þá auðveldustu
REMOVE ("<þá>"ri) + PersPron IF (1 Verb OR Adv OR N); # was + Conj
REMOVE ("<þá>"ri) + PersPron IF (-1 VerbBe OR Adv OR N); # was + Conj
REMOVE ("<þá>"ri) + PersPron IF ((-1C Refl) OR (-1 VerbBe OR PersPron)); # væri þá and réði sig þá
REMOVE PersPron + Nom IF (-1C* PersPron + Nom BARRIER VerbFin | CLB);
REMOVE PersPron + Nom IF (-1C* PersPron + Nom BARRIER VerbFin | CLB);
#sem hann sá í gær.
REMOVE PersPron + Acc IF (-1 ("sem")) (1 VerbFin) (0 PersPron + Nom) (0 PersPron + Acc) (NOT 1* Nom BARRIER CLB | EOS);
#Ég veit ekki hvort það hafa einhverjir nemendur ekki lokið verkefninu
REMOVE InterrPron + Nom IF (-1C* PersPron + Nom BARRIER CLB);
#################################################################################
# Indefinite pronouns
#################################################################################
#Hvað ert þú að gera ?
REMOVE Pron + Nom IF (1C* PersPron + Nom BARRIER CLB);
REMOVE IndefPron + Nom IF (-1 BOS | EOS) (0 IndefPron) (0 InterrPron);
REMOVE IndefPron IF (-1 BOS | EOS) (0 IndefPron) (0 InterrPron) ;
#################################################################################
# Demonstrative pronouns
#################################################################################
# Ég sá ekki
# finnst þér þá
#ég þakka þeim kærlega fyrir
#Fá þeir íbúar, sem ekki geta eða vilja snúa aftur í íbúðirnar yfir nóttina, aðstoð með gistingu ef þörf krefur.
REMOVE DemPron + Nom IF (0 DemPron) (0 PersPron); ## this needs fixing
#SELECT DemPron + $$GENDER + $$NUMBER + $$NAGD IF (1C N + $$GENDER + $$NUMBER + $$NAGD);
REMOVE DemPron IF (-1C PersPron) ;
REMOVE ("<þá>"ri) + DemPron IF (1 VerbFin) (NOT 1 Rel); # þá sem ...
REMOVE ("<þá>"ri) + DemPron IF (1 Conj OR Adv) (-1 BOS); # Þá þegar ...
REMOVE ("<þá>"ri) + DemPron IF (1C N OR ProperNom) (-1 Verb); # gekk þá maðurinn
# En mér leið ekki vel eftir þetta.
REMOVE Det IF (0 Det) (0 Pron) (NOT 1 POST-DET);
# Við gerðum það sjálf.
REMOVE DemPron IF (-1C VerbFin) (1C Refl);
# í gær sá stelpan.
REMOVE DemPron IF (0 DemPron) (0 VerbFin) (NOT -1* VerbFin BARRIER CLB) (NOT 1* VerbFin BARRIER CLB);
#Þetta er borgin, er hann kom frá.
SELECT Rel IF (-1C Coma) (0 ("er")) (1C* VerbFin BARRIER CLB);
# Eftir þetta fór hann til Kaupmannahafnar
REMOVE Nom IF (-1C AccDatPrep) (0C Nom OR Acc);
#################################################################################
# Reflexive pronouns
#################################################################################
# sem hann sér í dag.
REMOVE Refl IF (0 Refl) (0 VerbFin) (NOT -1* VerbFin BARRIER CLB) (NOT 1* VerbFin BARRIER CLB);
# Hann meiddi sig, Hún meiddi sig, Það meiddi sig, Þeir meiddu sig ...
SELECT Refl + $$GENDER + $$NUMBER IF (-1* PersPron + $$GENDER + $$NUMBER BARRIER CLB);
SELECT Refl + $$GENDER + $$NUMBER IF (-1* N + $$GENDER + $$NUMBER BARRIER CLB);
#################################################################################
# Interrogative pronouns
#################################################################################
# Ég veit ekki hvort Jón læsi aldrei bókina
REMOVE ("hvor"ri) + InterrPron IF (1 N + Nom) ;
REMOVE ("hvor"ri) + InterrPron IF (1 ProperNom + Nom) ;
#################################################################################
# Possessive pronouns
#################################################################################
#################################################################################
# Prepositions
#################################################################################
#að styðja
SELECT Prep IF (0 ("að"ri)) (1 VerbInf);
#sem leggja fram kæru á hendur ríkinu.
SELECT Prep IF (0 ("[Áá]"ri)) (1C N OR ProperNom) (NOT 1C Gen);
#Þeir eru undir rúminu
SELECT Prep IF (0 Prep) (0 Adv) (1C PRE-N);
#Ég talaði aldrei við Súsönnu.
REMOVE PersPron + Nom IF (-1C* PersPron BARRIER CLB);
#################################################################################
# NPs
#################################################################################
# 15 milljörðum dollara
# um 1.950 milljörðum íslenskra króna.
# This is a hack because we don't want to do unification with 'billion' etc. perhaps there
# is a better way of doing it .
SELECT (n pl gen) IF (-1C Num + (pl));
REMOVE:ufc_f PP + Fem IF (0 $$NAGD) (*1 NomMscNeu + $$NAGD BARRIER NPNHA LINK NOT 0 Fem);
REMOVE:ufc_m PP + Msc IF (0 $$NAGD) (*1 NomFemNeu + $$NAGD BARRIER NPNHA LINK NOT 0 Msc);
REMOVE:ufc_nt PP + Neu IF (0 $$NAGD) (*1 NomMscFem + $$NAGD BARRIER NPNHA LINK NOT 0 Neu);
REMOVE:ufc_f Adj + Fem IF (0 $$NAGD) (*1 NomMscNeu + $$NAGD BARRIER NPNHA LINK NOT 0 Fem);
REMOVE:ufc_m Adj + Msc IF (0 $$NAGD) (*1 NomFemNeu + $$NAGD BARRIER NPNHA LINK NOT 0 Msc);
REMOVE:ufc_nt Adj + Neu IF (0 $$NAGD) (*1 NomMscFem + $$NAGD BARRIER NPNHA LINK NOT 0 Neu);
REMOVE:ufc_f_p Adj + Fem IF (0 $$NAGD) (*-1 NomMscNeu + $$NAGD BARRIER NPNHA LINK NOT 0 Fem) (NOT 1* Nom + Fem BARRIER NPNHA);
REMOVE:ufc_m_p Adj + Msc IF (0 $$NAGD) (*-1 NomFemNeu + $$NAGD BARRIER NPNHA LINK NOT 0 Msc) (NOT 1* Nom + Msc BARRIER NPNHA);
REMOVE:ufc_nt_p Adj + Neu IF (0 $$NAGD) (*-1 NomMscFem + $$NAGD BARRIER NPNHA LINK NOT 0 Neu) (NOT 1* Nom + Neu BARRIER NPNHA);
REMOVE:d_ufc_f Det + Fem IF (0 $$NAGD) (*1 NomMscNeu + $$NAGD BARRIER NPNHA LINK NOT 0 Fem);
REMOVE:d_ufc_m Det + Msc IF (0 $$NAGD) (*1 NomFemNeu + $$NAGD BARRIER NPNHA LINK NOT 0 Msc);
REMOVE:d_ufc_nt Det + Neu IF (0 $$NAGD) (*1 NomMscFem + $$NAGD BARRIER NPNHA LINK NOT 0 Neu);
SELECT:r30 $$GENDER IF (-1C Num + $$GENDER)(0 N);
SELECT:r28 $$GENDER IF (-1C Adj + $$GENDER)(0 N);
SELECT:r28b $$GENDER IF (-1C Det + $$GENDER)(0 N);
SELECT:r28c $$GENDER IF (-1C IndefPron + $$GENDER)(0 N);
SELECT:r28d $$GENDER IF (-1C PP + $$GENDER)(0 N);
SELECT:r30 $$NAGD IF (-1C Num + $$NAGD)(0 N);
SELECT:r29 $$NAGD IF (-1C Adj + $$NAGD)(0 N);
SELECT:r29b $$NAGD IF (-1C Det + $$NAGD)(0 N);
SELECT:r29c $$NAGD IF (-1C IndefPron + $$NAGD)(0 N);
SELECT:r29d $$NAGD IF (-1C PP + $$NAGD)(0 N);
SELECT:r30 $$NUMBER IF (-1C Num + $$NUMBER)(0 N);
SELECT:r30 $$NUMBER IF (-1C Adj + $$NUMBER)(0 N);
SELECT:r30b $$NUMBER IF (-1C Det + $$NUMBER)(0 N);
SELECT:r30c $$NUMBER IF (-1C IndefPron + $$NUMBER)(0 N);
SELECT:r30d $$NUMBER IF (-1C PP + $$NUMBER)(0 N);
# enga bók
SELECT:r31 Det IF (1C PRE-N) (0 Det) (0 IndefPron);
#Nánast allir Íslendingar nota nú netið með ADSL.
SELECT Det IF (0 $$NAGD) (0 Det OR Pron) (1C* N) (1C* $$NAGD);
#Hundur systur minnar
SELECT Gen IF (-1C Nom) (0 NAGD) (1C PossPron)
#á mati lögreglu
SELECT Gen IF (-1C N) (-1C Nom OR Acc OR Dat) (NOT 0 VerbFin);
#Einungis þriðjungur Norðmanna vill að landið gangi í Evrópusambandið
#REMOVE Nom IF (-1C VerbFin) (0 Nom) (0 Acc) (NOT -1 DATSUBJVERB | ACCSUBJVERB);
#################################################################################
# Verbs
#################################################################################
REMOVE (imp) (NOT -1 CLB);
#Fá þeir íbúar, sem ekki geta eða vilja snúa aftur í íbúðirnar yfir nóttina, aðstoð með gistingu ef þörf krefur.
REMOVE Inf IF (-1 BOS) (NOT 1* VerbFin BARRIER CLB);
SELECT Inf IF (-1 ("að"ri)) (-2 ("við"ri));
# Ég sé hesta
#Ennfremur sé málið mjög viðkvæmt í Hollandi.
#Tjónið gæti verið meira en 15 milljörðum dollara eða um 1.950 milljörðum íslenskra króna.
REMOVE TempsIrreal + VerbAux IF (NOT -1* CS) (NOT -1 BOS) (NOT -1 Adv LINK -1 BOS) (NOT 0 VerbAux);
SELECT:r51 Inf IF (*-1 MODV BARRIER Verb);
# María mun koma
SELECT:r52 MODV IF (1C Inf);
# Íslendingar munu boða að þeir ætli að draga úr losun
SELECT MODV + TempsFin IF (1C Inf);
# Ég las bókina
# #á morgun í Kaupmannahöfn, að því er Ríkisútvarpið greindi frá.
SELECT:r53 $$PERSON IF (-1 PersPron + $$PERSON)(0 VerbFin) (NOT 1 ("er"ri));
SELECT:r53a $$PERSON IF (1C PersPron + $$PERSON + Nom)(0 VerbFin) (NOT 1 ("er"ri)) (NOT 0 ("sem"ri) OR ("er"ri));
SELECT:r53b VerbFin + $$NUMBER IF (-1 N + $$NUMBER + Nom) (0 VerbFin) (NOT 1 ("er"ri)) (NOT 0 ("sem"ri));
# The modals munu and skulu can never be preceded by another auxiliary. (p.10)
REMOVE:r54 VerbAux IF (1C ("munu") OR ("skulu"));
REMOVE:r55 VerbFin IF (-1C* VerbFin BARRIER CLB) (0 VerbFin) ;
# *NATO sendi 7.000 hermenn
REMOVE VerbFin + (p1) IF (NOT -1* (prn p1) BARRIER CLB) (NOT 1* (prn p1) BARRIER CLB) ;
REMOVE VerbFin + (p2) IF (NOT -1* (prn p2) BARRIER CLB) (NOT 1* (prn p2) BARRIER CLB) ;
#Bandaríkin hafa beðið þær 43 þjóðir sem eru með hermenn í Afganistan að senda 10.000 hermenn
SELECT ("hafa"ri) + VerbFin IF (1C* Participle BARRIER PRE-N);
#100 breskir hermenn hafa fallið í Afganistan
#Við vorum að vakna og nú tekur við fundur þar sem farið verður yfir verkefni dagsins
SELECT Participle IF (-1C VerbAux);
SELECT Participle IF (1C VerbAux);
#Himinninn er blár
REMOVE Rel IF (0 (""ri)) (NOT -1* VerbFin BARRIER CLB) (NOT 1* VerbFin BARRIER CLB);
#mátt þú ekki eta
SELECT MODV + $$PERSON IF (1C PersPron + $$PERSON) (NOT 0 ("er"));
# Í gær borðuðu allir stóru strákarnir góðu súpuna.
REMOVE PP IF (NOT -1* VerbFin BARRIER BOS) (NOT 1* VerbFin BARRIER EOS) (0 VerbFin) (0 PP);
# Þetta er besti árangur minn.
REMOVE ("er"i) + Conj IF (NOT -1* VerbFin BARRIER BOS) (NOT 1* VerbFin BARRIER EOS) (0 VerbFin) (0 Conj);
# Haítí, Port-au-Prince, er rústir einar.
REMOVE ("er"i) + Conj IF (-1C Coma LINK -1 N | ProperNom LINK -1C Coma);
#Þetta er portúgalgasti maður sem ég hef séð
REMOVE Conj IF (0 ("er"i)) (0 VerbFin OR Conj) (NOT -1* VerbFin BARRIER CLB) (NOT 1* VerbFin BARRIER CLB);
# en fjölskylda hans tilkynnti ekki um hvarfið fyrr en í nóvember þar sem hún hélt að hann væri á sólarströnd við Karabíska hafið.
REMOVE Verb IF (-1C Adj) (0 N) (0 Verb) (1 EOS);
REMOVE VerbFin IF (0 ("sem")) (1 PersPron + $$PERSON LINK 1* VerbFin + $$PERSON) ;
#Þetta er portúgalgasti maður sem ég hef séð
##
# Specific pre-disambiguation lexical selection
#
SELECT ("stig"ri) IF (0 (""ri)) (1 ("frost"ri) OR ("hiti"ri));
#################################################################################
# Syntax
#################################################################################
## Todo:
## Disambiguate co-ordinators, find objects, predicates, adverbials, just
## enough to let us write transfer rules more easily.
SECTION
LIST @X = @X;
LIST @←ADVL = @←ADVL;
LIST @ADVL→ = @ADVL→;
LIST @←SUBJ = @←SUBJ;
LIST @SUBJ→ = @SUBJ→;
LIST @-FMAINV = @-FMAINV;
LIST @+FMAINV = @+FMAINV;
LIST @-FAUXV = @-FAUXV;
LIST @+FAUXV = @FAUXV;
LIST @←N = @←N;
LIST @N→ = @N→;
LIST @CS→ = @CS→;
LIST @CNP = @CNP; # SN co-ordinator
LIST @CVP = @CVP; # SV co-ordinator
MAP (@←SUBJ) TARGET PersPron + Nom + $$PERSON + $$NUMBER (-1C* VerbFin + $$PERSON + $$NUMBER BARRIER CLB) (0C PersPron) (NOT 0 PossPron);
MAP (@SUBJ→) TARGET DemPron + Nom (1C* VerbFin BARRIER CLB) (0C DemPron) (NOT 0 PossPron);
MAP (@SUBJ→) TARGET PersPron + Nom (1C* VerbFin BARRIER CLB) (0C PersPron) (NOT 0 PossPron);
MAP (@SUBJ→) TARGET PersPron + Dat (1C* VerbFin + DATSUBJVERB BARRIER CLB) (0C PersPron) (NOT 0 PossPron);
MAP (@SUBJ→) TARGET PersPron + Acc (1C* VerbFin + ACCSUBJVERB BARRIER CLB) (0C PersPron) (NOT 0 PossPron);
MAP (@←SUBJ) TARGET PersPron + Nom + $$PERSON + $$NUMBER (-1C* VerbFin BARRIER CLB) (-1* VerbFin + $$PERSON + $$NUMBER BARRIER CLB) (0 PersPron + Nom) (NOT 0 PossPron) (NOT -1C* PersPron + Nom) (1 EOS);
MAP (@←SUBJ) TARGET PersPron + Dat (-1C* VerbFin + DATSUBJVERB BARRIER CLB) (0C PersPron) (NOT 0 PossPron);
MAP (@←SUBJ) TARGET PersPron + Acc (-1C* VerbFin + ACCSUBJVERB BARRIER CLB) (0C PersPron) (NOT 0 PossPron);
#MAP (@←SUBJ) TARGET N + Nom + $$NUMBER (-1C* VerbFin + $$NUMBER + (p3) BARRIER CLB) (0 N + Nom + $$NUMBER);
MAP (@←SUBJ) TARGET N + Nom + $$NUMBER (-1* VerbFin + $$NUMBER + (p3) BARRIER CLB)
(0 N + Nom + $$NUMBER)
(NOT -1* @←SUBJ OR @SUBJ→ BARRIER CLB);
MAP (@SUBJ→) TARGET N + Nom (1C VerbFin) (0 N + Nom);
MAP (@SUBJ→) TARGET N + Nom (1C* VerbFin BARRIER CLB) (0 N + Nom);
MAP (@SUBJ→) TARGET ProperNom + Nom (1 VerbFin) (0 ProperNom + Nom) (NOT -1* Nom BARRIER CLB) (NOT 1C* Nom BARRIER CLB);
MAP (@SUBJ→) TARGET ProperNom + Nom (1C VerbFin) (0C ProperNom + Nom);
MAP (@←SUBJ) TARGET ProperNom + Nom (-1C VerbFin) (0C ProperNom + Nom) (NOT -1C* @SUBJ→ BARRIER CLB);
#Litlu músina hafði stóra uglan étið í gær
MAP (@OBJ→) TARGET N + Acc (1C VerbFin) (0C N + Acc);
MAP (@←OBJ) TARGET N + Acc (-1C VerbFin) (0C N + Acc);
MAP (@←OBJ) TARGET N + Acc (-1C VerbFin LINK -1C @SUBJ→) (0 N + Acc);
# Check these 'auxv' with pp/supn and 'modv' with 'inf' ?
MAP (@+FAUXV) TARGET VerbAux + TempsFin (NOT 1C* VerbAux + TempsFin BARRIER CLB)
(NOT -1C* VerbAux + TempsFin BARRIER CLB)
(0C VerbAux + TempsFin)
(1C* Participle OR Inf OR VerbSupine BARRIER CLB | PRE-N);
MAP (@+FAUXV) TARGET MODV + TempsFin (NOT 1C* MODV + TempsFin BARRIER CLB)
(NOT -1C* MODV + TempsFin BARRIER CLB | Conj)
(0C MODV + TempsFin)
(1C* Participle OR Inf OR VerbSupine BARRIER CLB | PRE-N);
MAP (@+FMAINV) TARGET VerbFin + (p3) + $$NUMBER (1* @←SUBJ + $$NUMBER BARRIER CLB) (0 VerbFin LINK -1 Adv);
MAP (@+FMAINV) TARGET VerbFin + (p3) + $$NUMBER (-1* @SUBJ→ + $$NUMBER BARRIER CLB);
MAP (@+FMAINV) TARGET VerbFin + $$PERSON + $$NUMBER (-1C* @SUBJ→ + $$PERSON + $$NUMBER BARRIER CLB);
MAP (@+FMAINV) TARGET VerbFin + $$PERSON + $$NUMBER (1C* @←SUBJ + $$PERSON + $$NUMBER BARRIER CLB);
MAP (@+FMAINV) TARGET VerbFin + $$PERSON + $$NUMBER (NOT 1* @+FMAINV BARRIER CLB) (NOT -1* @+FMAINV BARRIER CLB);
# sem leikað var ...
MAP (@-FMAINV) TARGET VerbSupine | Participle (-1 Rel) (1C VerbAux + TempsFin);
MAP (@-FMAINV) TARGET VerbSupine | Participle (-1C* VerbAux + TempsFin BARRIER CLB);
# Hann er að læra. Most taggers mark "að" as infinitive. Make sure that only the verb gets the FMAINV marker
MAP (@-FMAINV) TARGET Inf (-1C* MODV + TempsFin BARRIER CLB + PRE-N) (NOT 0C (""));
MAP (@←N) TARGET Adj + $$NAGD + $$NUMBER (0C Adj) (-1C N + $$NAGD + $$NUMBER);
SELECT Adj + @←N (0C Adj) (NOT 1* N + $$NAGD + $$NUMBER) (-1C N + $$NAGD + $$NUMBER); ## ugly hack.
MAP (@N→) TARGET Num + Gen (0C Num) (1C N + Gen);
MAP (@N→) TARGET Num (0C Num) (1C* N BARRIER CLB | VerbFin);
MAP (@N→) TARGET (det) (0C (det)) (1C N);
MAP (@←N) TARGET Num + Gen (0C Num + $$NUMBER) (-1C N + $$NUMBER);
MAP (@←N) TARGET Num + Gen (0C Num) (-1C N + Gen);
MAP (@CS→) TARGET (cnjsub) (0C (cnjsub));
MAP (@ADVL→) TARGET Adv (1* @+FMAINV | @-FMAINV BARRIER CLB) ;
MAP (@←ADVL) TARGET Adv (-1* @+FMAINV | @-FMAINV BARRIER CLB) ;
AFTER-SECTIONS
#Okkur fannst hún grunsamleg er hún kom hingað fyrst og keypti sprautur og nálar.
SELECT @SUBJ→ IF (1C @+FMAINV);
MAP (@X) TARGET VerbFin;
MAP (@X) TARGET N;
MAP (@X) TARGET ProperNom;
MAP (@X) TARGET Det;
#REMOVE (@X);
# ganga# í vs. ganga+í
#SUBSTITUTE (pr @X) (pr) Verb (0 (+í));
#SUBSTITUTE (pr @+FMAINV) (pr) Verb (0 (+í));
#SUBSTITUTE (pr @-FMAINV) (pr) Verb (0 (+í));
#
#SUBSTITUTE (sg pr) (sg) Verb;
#SUBSTITUTE (+í) (+í pr) Verb (0 (+í));