Edinburgh Speech Tools  2.4-release
EST_relation_aux.cc
1 /*************************************************************************/
2 /* */
3 /* Centre for Speech Technology Research */
4 /* University of Edinburgh, UK */
5 /* Copyright (c) 1995,1996 */
6 /* All Rights Reserved. */
7 /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21 /* */
22 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /*************************************************************************/
33 /* Author : Paul Taylor and Simon King */
34 /* Date : June 1995 */
35 /*-----------------------------------------------------------------------*/
36 /* Relation class auxiliary routines */
37 /* */
38 /*=======================================================================*/
39 #include <cstdlib>
40 #include <iostream>
41 #include <fstream>
42 #include <cmath>
43 #include "EST_types.h"
44 #include "ling_class/EST_Relation.h"
45 #include "ling_class/EST_relation_aux.h"
46 #include "EST_string_aux.h"
47 #include "EST_io_aux.h"
48 #include "EST_Option.h"
49 #include "EST_Token.h"
50 
51 static int is_in_class(const EST_String &name, EST_StrList &s);
52 
53 bool dp_match(const EST_Relation &lexical,
54  const EST_Relation &surface,
55  EST_Relation &match,
56  float ins, float del, float sub);
57 
58 
59 float start(EST_Item *n)
60 {
61  return (iprev(n) == 0) ? 0.0 : iprev(n)->F("end");
62 }
63 
64 float duration(EST_Item *n)
65 {
66  return n->F("end") - start(n);
67 }
68 
69 void quantize(EST_Relation &a, float q)
70 {
71  EST_Item *a_ptr;
72  float end;
73 
74  for (a_ptr = a.head(); a_ptr != 0; a_ptr = inext(a_ptr))
75  {
76  end = a_ptr->F("end") / q;
77  end = rint(end);
78  end = end * q;
79  a_ptr->set("end", end);
80  }
81 }
82 
83 // edit labels using a sed file to do the editing
84 
85 int edit_labels(EST_Relation &a, EST_String sedfile)
86 {
87  EST_Item *a_ptr;
88  char command[100], name[100], newname[100], sf[100];
89  FILE *fp;
90  strcpy(sf, sedfile);
91  EST_String file1, file2;
92  file1 = make_tmp_filename();
93  file2 = make_tmp_filename();
94 
95  fp = fopen(file1, "wb");
96  if (fp == NULL)
97  {
98  fprintf(stderr,"edit_labels: cannot open \"%s\" for writing\n",
99  (const char *)file1);
100  return -1;
101  }
102  for (a_ptr = a.head(); a_ptr != 0; a_ptr = inext(a_ptr))
103  {
104  strcpy(name, a_ptr->name());
105  fprintf(fp, "%s\n", name);
106  }
107  fclose(fp);
108  strcpy(command, "cat ");
109  strcat(command, file1);
110  strcat(command, " | sed -f ");
111  strcat(command, sedfile);
112  strcat(command, " > ");
113  strcat(command, file2);
114 
115  printf("command: %s\n", command);
116  system(command);
117 
118  fp = fopen(file2, "rb");
119  if (fp == NULL)
120  {
121  fprintf(stderr,"edit_labels: cannot open \"%s\" for reading\n",
122  (const char *)file2);
123  return -1;
124  }
125  for (a_ptr = a.head(); a_ptr != 0; a_ptr = inext(a_ptr))
126  {
127  fscanf(fp, "%s", newname);
128 // cout << "oldname: " << a_ptr->name() << " newname: " << newname << endl;
129  a_ptr->set_name(newname);
130  }
131  fclose(fp);
132  return 0;
133 }
134 
135 // make new EST_Relation from start and end points.
136 void extract(const EST_Relation &orig, float s,
137  float e, EST_Relation &ex)
138 {
139  EST_Item *a;
140  EST_Item *tmp;
141 
142  for (a = orig.head(); a != 0; a = inext(a))
143  if ((a->F("end") > s) && (start(a) < e))
144  {
145  tmp = ex.append(a);
146  if ((a->F("end") > e))
147  tmp->set("end", e);
148  }
149 }
150 
151 void merge_all_label(EST_Relation &seg, const EST_String &labtype)
152 {
153  EST_Item *a_ptr, *n_ptr;
154  (void)labtype; // unused parameter
155 
156  for (a_ptr = seg.head(); a_ptr != seg.tail(); a_ptr = n_ptr)
157  {
158  n_ptr = inext(a_ptr);
159  if (a_ptr->name() == inext(a_ptr)->name())
160  seg.remove_item(a_ptr);
161  }
162 }
163 
164 void change_label(EST_Relation &seg, const EST_String &oname,
165  const EST_String &nname)
166 {
167  EST_Item *a_ptr;
168 
169  for (a_ptr = seg.head(); a_ptr != 0; a_ptr = inext(a_ptr))
170  if (a_ptr->name() == oname)
171  a_ptr->set_name(nname);
172 }
173 
174 void change_label(EST_Relation &seg, const EST_StrList &oname,
175  const EST_String &nname)
176 {
177  EST_Item *a_ptr;
178  EST_Litem *p;
179 
180  for (a_ptr = seg.head(); a_ptr != 0; a_ptr = inext(a_ptr))
181  for (p = oname.head(); p ; p = p->next())
182  if (a_ptr->name() == oname(p))
183  a_ptr->set_name(nname);
184 }
185 
186 static int is_in_class(const EST_String &name, EST_StrList &s)
187 {
188  EST_Litem *p;
189 
190  for (p = s.head(); p; p = p->next())
191  if (name == s(p))
192  return TRUE;
193 
194  return FALSE;
195 }
196 
197 int check_vocab(EST_Relation &a, EST_StrList &vocab)
198 {
199  EST_Item *s;
200  for (s = a.head(); s; s = inext(s))
201  if (!is_in_class(s->name(), vocab))
202  {
203  cerr<<"Illegal entry in file " <<a.name()<< ":\"" << *s << "\"\n";
204  return -1;
205  }
206  return 0;
207 }
208 
209 void convert_to_broad_class(EST_Relation &seg, const EST_String &class_type,
210  EST_Option &options)
211 {
212  // class_type contains a list of whitepsace separated segment names.
213  // This function looks at each segment and adds a feature "pos"
214  // if its name is contained in the list.
215  EST_String tmp_class_type = class_type + "_list";
216  EST_String bc_list(options.val(tmp_class_type, 1));
217  EST_StrList pos_list;
218  EST_TokenStream ts;
219 
220  ts.open_string(bc_list);
221  while (!ts.eof())
222  pos_list.append(ts.get().string());
223 
224  convert_to_broad(seg, pos_list);
225 }
226 
227 void convert_to_broad(EST_Relation &seg, EST_StrList &pos_list,
228  EST_String broad_name, int polarity)
229 {
230  EST_Item *a_ptr;
231  if (broad_name == "")
232  broad_name = "pos";
233 
234  for (a_ptr = seg.head(); a_ptr != 0; a_ptr = inext(a_ptr))
235  if (is_in_class(a_ptr->name(), pos_list))
236  a_ptr->set(broad_name, (polarity) ? 1 : 0);
237  else
238  a_ptr->set(broad_name, (polarity) ? 0 : 1);
239 }
240 
241 void label_map(EST_Relation &seg, EST_Option &map)
242 {
243  EST_Item *p;
244 
245  for (p = seg.head(); p != 0; p = inext(p))
246  {
247  if (map.present(p->name()))
248  {
249  if (map.val(p->name()) == "!DELETE")
250  seg.remove_item(p);
251  else
252  p->set_name(map.val(p->name()));
253  }
254 
255  }
256 }
257 
258 void shift_label(EST_Relation &seg, float shift)
259 {
260  //shift every end time by adding x seconds.
261  EST_Item *a_ptr;
262 
263  for (a_ptr = seg.head(); a_ptr != 0; a_ptr = inext(a_ptr))
264  a_ptr->set("end", a_ptr->F("end") + shift);
265 }
266 
267 void RelationList_select(EST_RelationList &mlf, EST_StrList filenames, bool
268  exact_match)
269 {
270  // select only files in 'filenames'
271  // remove all others from mlf
272  EST_Litem *fptr, *ptr;
273  bool flag;
274 
275  // if not exact match, only match basenames
276  EST_StrList tmp_filenames;
277  for (ptr = filenames.head(); ptr != NULL; ptr = ptr->next())
278  if(exact_match)
279  tmp_filenames.append( filenames(ptr) );
280  else
281  tmp_filenames.append( basename(filenames(ptr)) );
282 
283  for(fptr=mlf.head(); fptr != NULL;)
284  {
285  flag=false;
286  for (ptr = tmp_filenames.head(); ptr != NULL; ptr = ptr->next())
287  if(exact_match)
288  {
289  if(tmp_filenames(ptr) == mlf(fptr).name())
290  {
291  flag=true;
292  break;
293  }
294  }
295  else if(mlf(fptr).name().contains(tmp_filenames(ptr)))
296  {
297  flag=true;
298  break;
299  }
300 
301  if(!flag)
302  {
303  fptr = mlf.remove(fptr);
304 
305  if(fptr==0) // must have removed head of list
306  fptr=mlf.head();
307  else
308  fptr=fptr->next();
309  }
310  else
311  fptr=fptr->next();
312  }
313  tmp_filenames.clear();
314 }
315 
316 // look for a single file called "filename" and make a EST_Relation out of
317 // this
318 EST_Relation RelationList_extract(EST_RelationList &mlf, const EST_String &filename, bool base)
319 {
320 
321  EST_Litem *p;
322  EST_String test, ref;
323 
324  if (base)
325  for (p = mlf.head(); p; p = p->next())
326  {
327  if (basename(mlf(p).name(), "*")==basename(filename, "*"))
328  return mlf(p);
329  }
330  else
331  for (p = mlf.head(); p; p = p->next())
332  {
333  if (basename(mlf(p).name()) == filename)
334  return mlf(p);
335  }
336 
337  cerr << "No match for file " << filename << " found in mlf\n";
338  EST_Relation d;
339  return d;
340 }
341 
342 // combine all relation in MLF into a single relation.
343 EST_Relation RelationList_combine(EST_RelationList &mlf)
344 {
345  EST_Litem *p;
346  EST_Relation all;
347  EST_Item *s, *t = 0;
348  float last = 0.0;
349 
350  for (p = mlf.head(); p; p = p->next())
351  {
352  for (s = mlf(p).head(); s; s = inext(s))
353  {
354  t = all.append();
355  t->set("name", s->S("name"));
356  t->set("end", s->F("end") + last);
357  cout << "appended t " << t << endl;
358  }
359  last = (t != 0) ? t->F("end") : 0.0;
360  }
361  return all;
362 }
363 
364 EST_Relation RelationList_combine(EST_RelationList &mlf, EST_Relation &key)
365 {
366  EST_Litem *p;
367  EST_Relation all;
368  EST_Item *s, *t = 0, *k;
369  float st;
370 
371  if (key.length() != mlf.length())
372  {
373  cerr << "RelationList has " << mlf.length() << " elements: expected "
374  << key.length() << " from key file\n";
375  return all;
376  }
377 
378  for (k = key.head(), p = mlf.head(); p; p = p->next(), k = inext(k))
379  {
380  st = start(k);
381  for (s = mlf(p).head(); s; s = inext(s))
382  {
383  t = all.append();
384  t->set("name", s->S("name"));
385  t->set("end", (s->F("end") + st));
386  }
387  }
388  return all;
389 }
390 
391 int relation_divide(EST_RelationList &slist, EST_Relation &lab,
392  EST_Relation &keylab,
393  EST_StrList &blank, EST_String ext)
394 { // divides a single relation into multiple chunks according to the
395  // keylab relation. If the keylab boundary falls in the middle of a label,
396  // the label is assigned to the chunk which has the most overlap with
397  // it. Some labels may be specified in the "blank" list which means thy
398  // are duplicated across boundaries.
399 
400  EST_Relation a, newkey;
401  EST_Item *s, *k, *t = 0, *n;
402  EST_String filename;
403  float kstart;
404 
405  slist.clear();
406 
407  if ((keylab.tail())->F("end") < (lab.tail())->F("end"))
408  {
409  cerr << "Key file must extend beyond end of label file\n";
410  return -1;
411  }
412 
413  // find a the first keylab that will make a non-empty file
414  for (k = keylab.head(); k ; k = inext(k))
415  if (k->F("end") > lab.head()->F("end"))
416  break;
417 
418  filename = (EST_String)k->f("file");
419  a.f.set("name", (filename + ext));
420  kstart = 0.0;
421 
422  for (s = lab.head(); s; s = inext(s))
423  {
424  n = inext(s);
425  if (n == 0)
426  {
427  t = a.append(s);
428  t->set("end", (s->F("end") - kstart));
429  break;
430  }
431  if (n->F("end") > k->F("end"))
432  {
433  if (((n->F("end") - k->F("end")) <
434  (k->F("end") - start(n))) ||
435  is_in_class(n->name(), blank))
436  {
437  a.append(s);
438  t->set("end", (s->F("end") - kstart));
439 
440  t = a.append(n);
441  t->set("end", (k->F("end") - kstart));
442 
443  if (!is_in_class(n->name(), blank))
444  s = inext(s);
445  }
446  else
447  {
448  t = a.append(s);
449  t->set("end", (k->F("end") - kstart));
450  }
451 
452  slist.append(a);
453  k = inext(k);
454  kstart = start(k);
455  a.clear();
456  filename = (EST_String)k->f("file");
457  a.f.set("name", (filename + ext));
458  }
459  else
460  {
461  t = a.append(s);
462  t->set("end", (s->F("end") - kstart));
463  }
464  }
465  slist.append(a);
466 
467  return 0;
468 }
469 
470 int relation_divide2(EST_RelationList &mlf, EST_Relation &lab,
471  EST_Relation &keylab, EST_String ext)
472 {
473  EST_Relation a, newkey;
474  EST_Item *s, *k, *t;
475  float kstart;
476 
477  mlf.clear();
478 
479  if ((keylab.tail())->F("end") < (lab.tail())->F("end"))
480  {
481  cerr << "Key file must extend beyond end of label file\n";
482  return -1;
483  }
484 
485  k = keylab.head();
486  a.f.set("name", (k->name() + ext));
487  kstart = 0.0;
488 
489  for (s = lab.head(); s; s = inext(s))
490  {
491  t = a.append();
492  t->set_name(s->name());
493  t->set("end", (s->F("end") - kstart));
494 
495  if (s->F("end") > k->F("end"))
496  {
497  cout << "appending " << a;
498  mlf.append(a);
499 
500  kstart = s->F("end");
501  k->set("end", (s->F("end")));
502  k = inext(k);
503  a.clear();
504  a.f.set("name", (k->name() + ext));
505  }
506  }
507  cout << "appending " << a;
508  mlf.append(a);
509 
510  return 0;
511 }
512 
513 
514 
515 
516 void map_match_times(EST_Relation &target, const EST_String &match_name,
517  const EST_String &time_name, bool do_start)
518 {
519  EST_Item *s, *t, *p;
520  float prev_end, inc, first_end, last_end;
521  int i;
522 
523  // first pass, copy times as appropriate, and find first
524  // and last defined ends
525  // This is hacky and certainly won't work for many cases
526 
527  first_end = -1.0;
528  prev_end = 0.0;
529  last_end = 0.0;
530 
531 // cout << "surface: " << surface << endl;
532 
533  for (s = target.head(); s; s = inext(s))
534  {
535  if ((t = daughter1(s->as_relation(match_name))) != 0)
536  {
537  s->set(time_name + "end", t->F("end"));
538  if (do_start)
539  s->set(time_name + "start", t->F("start"));
540 
541  last_end = t->F("end");
542  if (first_end < 0.0)
543  first_end = t->F("end");
544  }
545  }
546 
547  if (!target.head()->f_present(time_name + "end"))
548  {
549  target.head()->set(time_name + "end", first_end / 2.0);
550  if (do_start)
551  target.head()->set(time_name + "start", 0.0);
552  }
553 
554  if (!target.tail()->f_present(time_name + "end"))
555  {
556  target.tail()->set(time_name + "end", last_end + 0.01);
557  if (do_start)
558  target.tail()->set(time_name + "start", last_end);
559  }
560 
561  for (s = target.head(); s; s = inext(s))
562  {
563  if (!s->f_present(time_name + "end"))
564  {
565 // cout << "missing end feature for " << *s << endl;
566  for (i = 1, p = s; p; p = inext(p), ++i)
567  if (p->f_present(time_name + "end"))
568  break;
569  inc = (p->F(time_name + "end") - prev_end) / ((float) i);
570 // cout << "inc is : " << inc << endl;
571 
572 // cout << "stop phone is " << *p << endl;
573 
574  for (i = 1; s !=p ; s = inext(s), ++i)
575  {
576  s->set(time_name + "end", (prev_end + ((float) i * inc)));
577  if (do_start)
578  s->set(time_name + "start", (prev_end+((float) (i - 1 )* inc)));
579  }
580  }
581  prev_end = s->F("end");
582  }
583 }
584 
585 void dp_time_align(EST_Utterance &utt, const EST_String &source_name,
586  const EST_String &target_name,
587  const EST_String &time_name,
588  bool do_start)
589 {
590  utt.create_relation("Match");
591 
592  dp_match(*utt.relation(target_name), *utt.relation(source_name),
593  *utt.relation("Match"), 7.0, 7.0, 7.0);
594 
595  map_match_times(*utt.relation(target_name), "Match", time_name, do_start);
596 }
597 
598 
599 EST_Litem *RelationList_ptr_extract(EST_RelationList &mlf, const EST_String &filename, bool base)
600 {
601  EST_Litem *p;
602  EST_String test, ref;
603 
604  if (base)
605  for (p = mlf.head(); p; p = p->next())
606  {
607  if (basename(mlf(p).name(), "*")==basename(filename, "*"))
608  return p;
609  }
610  else
611  for (p = mlf.head(); p; p = p->next())
612  if (mlf(p).name() == filename)
613  return p;
614 
615  cerr << "No match for file " << filename << " found in mlf\n";
616  return 0;
617 }
618 
619 void relation_convert(EST_Relation &lab, EST_Option &al, EST_Option &op)
620 {
621  if (al.present("-shift"))
622  shift_label(lab, al.fval("-shift"));
623 
624  // fix option later.
625  if (al.present("-extend"))
626  al.override_fval("-length",
627  al.fval("-extend",0) * lab.tail()->F("end"));
628 
629  // quantize (ie round up or down) label times
630  if (al.present("-q"))
631  quantize(lab, al.fval("-q"));
632 
633  if (al.present("-start"))
634  {
635  if (!al.present("-end"))
636  cerr << "-start option must be used with -end option\n";
637  else
638  extract(lab, al.fval("-start"), al.fval("-end"), lab);
639  }
640 
641  if (al.present("-class"))
642  convert_to_broad_class(lab, al.val("-class"), op);
643 
644  else if (al.present("-pos"))
645  {
646  EST_StrList bclass;
647  StringtoStrList(al.val("-lablist"), bclass);
648  convert_to_broad(lab, bclass);
649  }
650  else if (al.present("-sed"))
651  edit_labels(lab, al.val("-sed"));
652  else if (al.present("-map"))
653  {
654  EST_Option map;
655  if (map.load(al.val("-map")) != format_ok)
656  return;
657  label_map(lab, map);
658  }
659 }
660 
661 
662 
663 void print_relation_features(EST_Relation &stream)
664 {
665  EST_Item *s;
667 
668  for (s = stream.head(); s; s = inext(s))
669  {
670  cout << s->name() << "\t:";
671  for(p.begin(s->features()); p; ++p)
672  cout << p->k << " "
673  << p->v << "; ";
674  cout << endl;
675  }
676 
677 }
678 
679 
680 void build_RelationList_hash_table(EST_RelationList &mlf,
681  EST_hashedRelationList &hash_table,
682  const bool base)
683 {
684  EST_Litem *p;
685  if (base)
686  for (p = mlf.head(); p; p = p->next())
687  hash_table.add_item(basename(mlf(p).name(), "*"),
688  &(mlf(p)));
689  else
690  for (p = mlf.head(); p; p = p->next())
691  hash_table.add_item(mlf(p).name(),
692  &(mlf(p)));
693 }
694 
695 
696 bool hashed_RelationList_extract(EST_Relation* &rel,
697  const EST_hashedRelationList &hash_table,
698  const EST_String &filename, bool base)
699 {
700  EST_Relation *d;
701  EST_String fname = filename;
702  int found;
703 
704  if (base)
705  fname=basename(filename, "*");
706 
707  d=hash_table.val(fname,found);
708 
709  if(found)
710  {
711  rel = d;
712  return true;
713  }
714  cerr << "No match for file " << fname << " found in mlf\n";
715  return false;
716 }
717 
718 
EST_Relation * relation(const char *name, int err_on_not_found=1) const
get relation by name
EST_TokenStream & get(EST_Token &t)
get next token in stream
Definition: EST_Token.cc:486
void remove_item(EST_Item *item)
EST_Relation * create_relation(const EST_String &relname)
create a new relation called <parameter>n</parameter>.
int override_fval(const EST_String rkey, const float rval)
add to end of list or overwrite. If rval is empty, do nothing
Definition: EST_Option.cc:56
void set(const EST_String &name, int ival)
Definition: EST_Features.h:185
V & val(const K &key, int &found) const
Definition: EST_THash.cc:114
int length() const
int open_string(const EST_String &newbuffer)
open a {EST_TokenStream} for string rather than a file
Definition: EST_Token.cc:251
int eof()
end of file
Definition: EST_Token.h:356
EST_Item * as_relation(const char *relname) const
View item from another relation (const char *) method.
Definition: EST_Item.h:302
void set(const EST_String &name, int ival)
Definition: EST_Item.h:179
EST_read_status load(const EST_String &filename, const EST_String &comment=";")
Definition: EST_Option.cc:138
EST_Item * tail() const
Definition: EST_Relation.h:131
const EST_String S(const EST_String &name) const
Definition: EST_Item.h:143
const float F(const EST_String &name) const
Definition: EST_Item.h:134
float fval(const EST_String &rkey, int m=1) const
Definition: EST_Option.cc:98
const int present(const K &rkey) const
Returns true if key is present.
Definition: EST_TKVL.cc:222
EST_Item * head() const
Definition: EST_Relation.h:125
const EST_String & name() const
Definition: EST_Relation.h:122
void append(const T &item)
add item onto end of list
Definition: EST_TList.h:191
int add_item(const K &key, const V &value, int no_search=0)
Add an entry to the table.
Definition: EST_THash.cc:167
EST_Features f
Definition: EST_Relation.h:103
void begin(const Container &over)
Set the iterator ready to run over this container.
const V & val(const K &rkey, bool m=0) const
return value according to key (const)
Definition: EST_TKVL.cc:145
int f_present(const EST_String &name) const
Definition: EST_Item.h:230
void clear(void)
remove all items in list
Definition: EST_TList.h:239
EST_Litem * remove(EST_Litem *ptr)
Definition: EST_TList.h:175