From f6e9d17f9bde89d4ff2528f62f69f1d7944a5025 Mon Sep 17 00:00:00 2001 From: Jacques Distler Date: Thu, 22 Jul 2010 14:08:21 -0500 Subject: [PATCH] Non-Latin WikiWords Fix bug in WikiWord processing reproted by Alexander Hambug. Also fix Cyrillic Capital/Lowercase regexps. --- lib/wiki_content.rb | 2 +- lib/wiki_words.rb | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/lib/wiki_content.rb b/lib/wiki_content.rb index a5dc473d..95e04271 100644 --- a/lib/wiki_content.rb +++ b/lib/wiki_content.rb @@ -181,7 +181,7 @@ class WikiContent < ActiveSupport::SafeBuffer @options[:engine].apply_to(copy) copy.inside_chunks(@options[:hide_chunks]) do |id| - @chunks_by_id[id.to_i].revert + @chunks_by_id[id.to_i].revert if @chunks_by_id[id.to_i] end end diff --git a/lib/wiki_words.rb b/lib/wiki_words.rb index bc3e1824..3839c381 100644 --- a/lib/wiki_words.rb +++ b/lib/wiki_words.rb @@ -8,16 +8,17 @@ module WikiWords I18N_HIGHER_CASE_LETTERS = "ÀÁÂÃÄÅĀĄĂÆÇĆČĈĊĎĐÈÉÊËĒĘĚĔĖĜĞĠĢĤĦÌÍÎÏĪĨĬĮİIJĴĶŁĽĹĻĿÑŃŇŅŊÒÓÔÕÖØŌŐŎŒŔŘŖŚŠŞŜȘŤŢŦȚÙÚÛÜŪŮŰŬŨŲŴŶŸȲÝŹŽŻ" + "ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ" + - "ΆΈΉΊΌΎΏѠѢѤѦѨѪѬѮѰѲѴѶѸѺѼѾҀҊҌҎҐҒҔҖҘҚҜҞҠҢҤҦҨҪҬҮҰҲҴҶҸҺҼҾӁӃӅӇӉӋӍӏӐӒӔӖӘӚӜӞӠӢӤӦӨӪӬӮӰӲӴӸЖ" + + "ЀЁЂЃЄЅІЇЈЉЊЋЌЍЎЏАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯѠѢѤѦѨѪѬѮѰѲѴѶѸѺѼѾҀҊҌҎҐҒҔҖҘҚҜҞҠҢҤҦҨҪҬҮҰҲҴҶҸҺҼҾӀӁӃӅӇӉӋӍӐӒӔӖӘӚӜӞӠӢӤӦӨӪӬӮӰӲӴӶӸӺӼӾԀԂԄԆԈԊԌԎԐԒԔԖԘԚԜԞԠԢ" + "ԱԲԳԴԵԶԷԸԹԺԻԼԽԾԿՀՁՂՃՄՅՆՇՈՉՊՋՌՍՎՏՐՑՒՓՔՕՖ" I18N_LOWER_CASE_LETTERS = "àáâãäåāąăæçćĉċčďđèéêëēęěĕėƒĝğġģĥħìíîïīĩĭįıijĵķĸłľĺļŀñńňņʼnŋòóôõöøōŏőœŕřŗśŝšşșťţŧțùúûüūůűŭũųŵýÿŷžżźÞþßſð" + "άέήίΰαβγδεζηθικλμνξοπρςστυφχψωϊϋόύώΐ" + - "абвгдежзийклмнопрстуфхцчшщъыьэюяѐёђѓєѕіїјљћќѝўџѡѣѥѧѩѫѭѯѱѳѵѷѹѻѽѿҁҋҌҍҏґғҕҗҙқҝҟҡңҥҧҩҫҭүұҳҵҷҹһҽҿӀӂӄӆӈӊӌӎӑӓӕӗәӛӝӟӡӣӥӧөӫӭӯӱӳӵӹ" + - "աբգդեզէըթժիլխծկհձղճմյնշոչպջռսվտրցւփքօֆև" + "абвгдежзийклмнопрстуфхцчшщъыьэюяѐёђѓєѕіїјљњћќѝўџѡѣѥѧѩѫѭѯѱѳѵѷѹѻѽѿҁҋҍҏґғҕҗҙқҝҟҡңҥҧҩҫҭүұҳҵҷҹһҽҿӂӄӆӈӊӌӎӏӑӓӕӗәӛӝӟӡӣӥӧөӫӭӯӱӳӵӷӹӻӽӿԁԃԅԇԉԋԍԏԑԓԕԗԙԛԝԟԡԣ" + + "աբգդեզէըթժիլխծկհձղճմյնշոչպջռսվտրցւփքօֆև" - WIKI_WORD_PATTERN = '[A-Z' + I18N_HIGHER_CASE_LETTERS + ']+[a-z' + I18N_LOWER_CASE_LETTERS + ']+[A-Z' + I18N_HIGHER_CASE_LETTERS + ']\w+' + WIKI_WORD_PATTERN = '[A-Z' + I18N_HIGHER_CASE_LETTERS + ']+[a-z' + I18N_LOWER_CASE_LETTERS + ']+[A-Z' + I18N_HIGHER_CASE_LETTERS + + '][A-Za-z0-9_' + I18N_HIGHER_CASE_LETTERS + I18N_LOWER_CASE_LETTERS + ']+' CAMEL_CASED_WORD_BORDER = /([a-z#{I18N_LOWER_CASE_LETTERS}])([A-Z#{I18N_HIGHER_CASE_LETTERS}])/u def self.separate(wiki_word)