Short description:
Usage:
psql tt< pgweb.sql
-- Creates configuration 'pg' - default, should match server's locale !!! -- Change 'ru_RU.UTF-8' begin; -- create special (default) configuration 'pg' update pg_ts_cfg set locale=NULL where locale = 'ru_RU.UTF-8'; insert into pg_ts_cfg values('pg','default','ru_RU.UTF8'); -- register 'pg_dict' dictionary using synonym template -- postgres pg -- pgsql pg -- postgresql pg insert into pg_ts_dict (select 'pg_dict',dict_init, '/usr/local/pgsql-dev/share/contrib/pg_dict.txt', dict_lexize, 'pg-specific dictionary' from pg_ts_dict where dict_name='synonym' ); -- register ispell dictionary, check paths and stop words -- I used iconv for english files, since there are some cyrillic stuff insert into pg_ts_dict (SELECT 'en_ispell', dict_init, 'DictFile="/usr/local/share/dicts/ispell/utf8/english-utf8.dict",' 'AffFile="/usr/local/share/dicts/ispell/utf8/english-utf8.aff",' 'StopFile="/usr/local/share/dicts/ispell/utf8/english-utf8.stop"', dict_lexize FROM pg_ts_dict WHERE dict_name = 'ispell_template' ); -- use the same stop-word list as 'en_ispell' dictionary UPDATE pg_ts_dict set dict_initoption='/usr/local/share/dicts/english.stop' where dict_name='en_stem'; -- default token<->dicts mappings insert into pg_ts_cfgmap select 'pg', tok_alias, dict_name from public.pg_ts_cf gmap where ts_name='default'; -- modify mappings for latin words for configuration 'pg' update pg_ts_cfgmap set dict_name = '{pg_dict,en_ispell,en_stem}' where tok_alias in ( 'lword', 'lhword', 'lpart_hword' ) and ts_name = 'pg'; -- we won't index/search some tokens update pg_ts_cfgmap set dict_name = NULL where tok_alias in ('email', 'url', 'sfloat', 'uri', 'float') and ts_name = 'pg'; -- testing select * from ts_debug(' PostgreSQL, the highly scalable, SQL compliant, open source object-relational database management system, is now undergoing beta testing of the next version of our software: PostgreSQL 8.2. '); end;
You shoud get something like:
ts_name | tok_type | description | token | dict_name | tsvector ---------+-------------+-------------------------------+-------------------+-----------------------------+------------------------------------ pg | lword | Latin word | PostgreSQL | {pg_dict,en_ispell,en_stem} | 'pg' pg | lword | Latin word | the | {pg_dict,en_ispell,en_stem} | pg | lword | Latin word | highly | {pg_dict,en_ispell,en_stem} | 'high' pg | lword | Latin word | scalable | {pg_dict,en_ispell,en_stem} | 'scalable' pg | lword | Latin word | SQL | {pg_dict,en_ispell,en_stem} | 'sql' pg | lword | Latin word | compliant | {pg_dict,en_ispell,en_stem} | 'compliant' pg | lword | Latin word | open | {pg_dict,en_ispell,en_stem} | 'open' pg | lword | Latin word | source | {pg_dict,en_ispell,en_stem} | 'source' pg | lhword | Latin hyphenated word | object-relational | {pg_dict,en_ispell,en_stem} | 'object' 'object-rel' 'relational' pg | lpart_hword | Latin part of hyphenated word | object | {pg_dict,en_ispell,en_stem} | 'object' pg | lpart_hword | Latin part of hyphenated word | relational | {pg_dict,en_ispell,en_stem} | 'relational' pg | lword | Latin word | database | {pg_dict,en_ispell,en_stem} | 'database' pg | lword | Latin word | management | {pg_dict,en_ispell,en_stem} | 'management' pg | lword | Latin word | system | {pg_dict,en_ispell,en_stem} | 'system' pg | lword | Latin word | is | {pg_dict,en_ispell,en_stem} | pg | lword | Latin word | now | {pg_dict,en_ispell,en_stem} | 'now' pg | lword | Latin word | undergoing | {pg_dict,en_ispell,en_stem} | 'undergo' pg | lword | Latin word | beta | {pg_dict,en_ispell,en_stem} | 'beta' pg | lword | Latin word | testing | {pg_dict,en_ispell,en_stem} | 'test' pg | lword | Latin word | of | {pg_dict,en_ispell,en_stem} | pg | lword | Latin word | the | {pg_dict,en_ispell,en_stem} | pg | lword | Latin word | next | {pg_dict,en_ispell,en_stem} | 'next' pg | lword | Latin word | version | {pg_dict,en_ispell,en_stem} | 'verse' pg | lword | Latin word | of | {pg_dict,en_ispell,en_stem} | pg | lword | Latin word | our | {pg_dict,en_ispell,en_stem} | pg | lword | Latin word | software | {pg_dict,en_ispell,en_stem} | 'software' pg | lword | Latin word | PostgreSQL | {pg_dict,en_ispell,en_stem} | 'pg' pg | float | Decimal notation | 8.2 | | (28 rows)