The OpenNET Project / Index page

[ новости /+++ | форум | теги | ]

xemacs russian syntax and case tables (rus xemacs encoding example)


<< Предыдущая ИНДЕКС Поиск в статьях src Установить закладку Перейти на закладку Следующая >>
Ключевые слова: rus, xemacs, encoding, example,  (найти похожие документы)
_ RU.UNIX (2:5077/15.22) ____________________________________________ RU.UNIX _ From : Boris Tobotras 2:5020/510 19 Dec 99 13:24:08 Subj : xemacs russian syntax and case tables _______________________________________________________________________________ >>>>> "Serge" == Serge Matveev writes: Serge> Да, если кто не понял, речь идет о русской "в". Она же, похоже, Serge> влияет и на удаление предложений (M-k) - не всегда то, что я Serge> хочу. Очень обломно, однако :-(( Да понял кто, понял. Русская "в" относится к числу немногих букв, не являющихся буквами 8859-1 в кодировке КОИ-8 ;) Hадо вправить emacs'у syntax table, делов-то. Во. Hашел. Авторство -- Ильи Перминова. rus-syntax.el: ;;;; Russian syntax and case tables. (require 'cl) (require 'rus-encodings) (defun rus-copy-case-table (case-table) (if (listp case-table) ;; Old Emacses and XEmacs. (list (copy-sequence (car case-table)) nil nil nil) (copy-case-table case-table)) ) (defun case-table-aset (ct x y) (if (listp ct) (setq ct (car ct))) (aset ct x y) ) (defun rus-syntax-table () "Set syntax and case tables for the current buffer according to encoding of russian letters in the buffer. The encoding must be in variable RUS-BUFFER-ENCODING." (let* ((e (rus-encoding rus-buffer-encoding)) (ct (rus-copy-case-table (current-case-table))) (st (copy-syntax-table (syntax-table))) (lc-chars (substring e 0 (/ (length e) 2))) (uc-chars (substring e (/ (length e) 2)))) (mapcar (function (lambda (x) (modify-syntax-entry x "w" st))) e) (mapcar* (function (lambda (x y) (case-table-aset ct x y))) lc-chars lc-chars) (mapcar* (function (lambda (x y) (case-table-aset ct x y))) uc-chars lc-chars) (set-syntax-table st) (set-case-table ct)) ) (add-hook 'rus-set-buffer-encoding-hook 'rus-syntax-table) (provide 'rus-syntax) rus-encodings.el ;;;; Various encodings of russian letters. ;;;; Each encoding definition is a sequence of codes(numbers) of ;;;; small letters in alphabet order and then capital letters ;;;; in alphabet order. (defconst rus-encoding-alt (concat "\240\241\242\243\244\245\361\246\247" "\250\251\252\253\254\255\256\257" "\340\341\342\343\344\345\346\347" "\350\351\352\353\354\355\356\357" "\200\201\202\203\204\205\360\206\207" "\210\211\212\213\214\215\216\217" "\220\221\222\223\224\225\226\227" "\230\231\232\233\234\235\236\237")) (defconst rus-encoding-8koi (concat "\301\302\327\307\304\305\243\326\332" "\311\312\313\314\315\316\317\320" "\322\323\324\325\306\310\303\336" "\333\335\337\331\330\334\300\321" "\341\342\367\347\344\345\263\366\372" "\351\352\353\354\355\356\357\360" "\362\363\364\365\346\350\343\376" "\373\375\377\371\370\374\340\361")) (defconst rus-encoding-cp1251 (concat "\340\341\342\343\344\345\270\346\347" "\350\351\352\353\354\355\356\357" "\360\361\362\363\364\365\366\367" "\370\371\372\373\374\375\376\377" "\300\301\302\303\304\305\250\306\307" "\310\311\312\313\314\315\316\317" "\320\321\322\323\324\325\326\327" "\330\331\332\333\334\335\336\337")) (defconst rus-encoding-jcuken (concat "f,dult/;pbqrkvyjghcnea[wxio]sm'.z" "F<DULT?:PBQRKVYJGHCNEA{WXIO}SM\">Z")) (defconst rus-encoding-ascii (concat "abwgde^vzijklmnoprstufhc=[]#yx\\`q" "ABWGDE&VZIJKLMNOPRSTUFHC+{}$YX|~Q")) ;;; Alist mapping encoding names to their definitions. (defvar rus-encodings-alist '( ("koi8" . rus-encoding-8koi) ("cp1251" . rus-encoding-cp1251) ("alt" . rus-encoding-alt)) "Alist mapping encoding names to their definitions.") (defun rus-encoding (name) "Return the definition of the encoding NAME of russian letters." (eval (cdr (assoc name rus-encodings-alist))) ) (defvar rus-buffer-encoding nil "Encoding of russian chars in the current buffer.") (make-variable-buffer-local 'rus-buffer-encoding) (defvar rus-set-buffer-encoding-hook nil "List of functions to call after encoding of russian letters in the current buffer is set.") (defun rus-set-buffer-encoding (&optional encoding) "Read encoding of russian chars for the current buffer and set RUS-BUFFER-ENCODING respectively." (interactive) (if encoding (setq rus-buffer-encoding encoding) (let ((e (or (rus-guess-buffer-encoding) "koi8"))) (setq rus-buffer-encoding (completing-read (concat "Buffer encoding (default " e "): ") rus-encodings-alist nil t)) (if (equal rus-buffer-encoding "") (setq rus-buffer-encoding e)))) (run-hooks 'rus-set-buffer-encoding-hook) ) (defun rus-auto-set-buffer-encoding (&optional ask) (interactive "P") (rus-set-buffer-encoding (and (not ask) (rus-guess-buffer-encoding))) ) (require 'cl) (defconst frequent_pairs '( (18 . 19) (19 . 15) (17 . 0) (19 . 5) (16 . 15) ( 5 . 19) ( 2 . 0) (15 . 2) (17 . 15) (15 . 12) ( 9 . 18) (14 . 15) (11 . 15) ( 5 . 17) (16 . 17) (14 . 0) (14 . 9) ( 5 . 14) (19 . 29) ( 0 . 19)) "Pairs of russian letters that occurs in russian texts most frequently.") (defconst recognition-level 10 "How much pairs of russian letters from FREQUENT_PAIRS (in %) must be in a text to recognize the text as russian (in corresponding encoding).") (defvar max-length-of-text-to-analyze 5000 "How many letters RUS-GUESS-BUFFER-ENCODING should analyze.") (defun rus-guess-buffer-encoding () "Analyze current buffer and if it contains russian text return the name of of the text encoding." (let ((i 0) c (prev -1) (freqs (make-vector 128 nil)) (count 0) encoding (lim (if (> (- (point-max) (point-min)) max-length-of-text-to-analyze) (+ (point-min) max-length-of-text-to-analyze) (point-max)))) ;; Make empty table. (while (< i 128) (aset freqs i (make-vector 128 0)) (setq i (1+ i))) ;; Scan current buffer, calculate frequencies of char pairs ;; and store them to the table. (setq i (point-min)) (while (< i lim) (setq c (- (char-after i) 128)) (if (and (>= c 0) (<= c 127)) (progn (setq count (1+ count)) (if (and (>= prev 0) (<= prev 127)) (aset (aref freqs prev) c (1+ (aref (aref freqs prev) c)))))) (setq prev c) (setq i (1+ i))) ;; Detect encoding. (some (function (lambda (ename) (let* ((e (rus-encoding ename)) (sum (reduce (function (lambda (s p) (+ s (aref (aref freqs (- (aref e (car p)) 128)) (- (aref e (cdr p)) 128))))) frequent_pairs :initial-value 0))) (if (and (> sum 0) (> (/ (* sum 100) count) recognition-level)) ename nil)))) (mapcar 'car rus-encodings-alist))) ) (provide 'rus-encodings) -- Best regards, -- Boris. Some people are only alive because it is illegal to kill them. --- Gnus v5.5/XEmacs 20.3 - "London" * Origin: Linux inside (2:5020/510@fidonet)

<< Предыдущая ИНДЕКС Поиск в статьях src Установить закладку Перейти на закладку Следующая >>

 Добавить комментарий
Имя:
E-Mail:
Заголовок:
Текст:




Партнёры:
PostgresPro
Inferno Solutions
Hosting by Hoster.ru
Хостинг:

Закладки на сайте
Проследить за страницей
Created 1996-2025 by Maxim Chirkov
Добавить, Поддержать, Вебмастеру