added word count views

2008-06-02 09:36:48 -07:00 · 2008-06-02 09:36:48 -07:00 · e4e533e533
commit e4e533e533
parent b36552e710
16 changed files with 12 additions and 604 deletions
--- a/views/books/chunked-map.js
+++ b/views/books/chunked-map.js
@ -0,0 +1,2 @@
+
+function(doc){doc.title&&doc.chunk&&emit([doc.title,doc.chunk],null);}
--- a/views/books/united-map.js
+++ b/views/books/united-map.js
@ -0,0 +1,2 @@
+
+function(doc){if(doc.text&&doc.text.match(/united/))emit([doc.title,doc.chunk],null)}
--- a/views/markov/chain-map.js
+++ b/views/markov/chain-map.js
@ -0,0 +1,2 @@
+
+function(doc){var words=doc.text.split(/\W/).filter(function(w){return w.length>0}).map(function(w){return w.toLowerCase()});for(var i=0,l=words.length;i<l;i++){emit(words.slice(i,4),doc.title);}}
--- a/views/markov/chain-reduce.js
+++ b/views/markov/chain-reduce.js
@ -0,0 +1,2 @@
+
+function(key,vs,c){if(c){return sum(vs);}else{return vs.length;}}
--- a/views/mentions/inbound-map.js
+++ b/views/mentions/inbound-map.js
@ -1,2 +0,0 @@
-
-function(doc){if(doc.mp3s){for(var i=0,m;m=doc.mp3s[i];i++){emit(m.href,doc.fetch.url);}}}
--- a/views/mentions/inbound-reduce.js
+++ b/views/mentions/inbound-reduce.js
@ -1,3 +0,0 @@
-
-function(hrefs,ss){log(ss)
-return ss[0];}
--- a/views/mentions/mp3links-map.js
+++ b/views/mentions/mp3links-map.js
@ -1,2 +0,0 @@
-
-function(doc){var fetchurl=doc.fetch&&doc.fetch.url;if(!fetchurl)return;doc.entries&&doc.entries.forEach(function(e){e.mp3s&&e.mp3s.forEach(function(mp3){mp3.href&&emit(mp3.href,fetchurl);});});doc.playlist&&doc.playlist.track&&doc.playlist.track.forEach(function(t){t.location&&t.location.forEach(function(url){emit(url,fetchurl);});});doc.mp3s&&doc.mp3s.forEach(function(mp3){mp3.href&&emit(mp3.href,fetchurl);});}
--- a/views/mentions/mp3links-reduce.js
+++ b/views/mentions/mp3links-reduce.js
@ -1,2 +0,0 @@
-
-function(ks,vs){log({keys:ks});log({values:vs});return 1;};
--- a/views/metadata/albums-map.js
+++ b/views/metadata/albums-map.js
@ -1,2 +0,0 @@
-
-function(doc){doc.playlist&&doc.playlist.track&&doc.playlist.track.forEach(function(t){emit([t.creator||null,t.title||null],t.album||null);});};
--- a/views/metadata/track-map.js
+++ b/views/metadata/track-map.js
@ -1,2 +0,0 @@
-
-function(doc){doc.playlist&&doc.playlist.track&&doc.playlist.track.forEach(function(t){if(t.creator||t.title){if(t.location){t.location.forEach(function(url){emit([t.creator||null,t.title||null],url);});}else{emit([t.creator||null,t.title||null],null);}}});};
--- a/views/metadata/track-reduce.js
+++ b/views/metadata/track-reduce.js
@ -1,2 +0,0 @@
-
-function(ks,vs,c){if(c){return null;}else{log(ks[0][0][0]);return ks[0][0][0];}};
--- a/views/test/maponly-map.js
+++ b/views/test/maponly-map.js
@ -1,2 +0,0 @@
-
-function(doc){emit(null,doc);}
--- a/views/test/reducehaving-map.js
+++ b/views/test/reducehaving-map.js
@ -1,585 +0,0 @@
-/*
-
- JS Beautifier
---------------
-  $Date: 2008-05-26 06:34:52 +0300 (Mon, 26 May 2008) $
-  $Revision: 55 $
-
-
-  Written by Einars "elfz" Lielmanis, <elfz@laacz.lv> 
-      http://elfz.laacz.lv/beautify/
-
-  Originally converted to javascript by Vital, <vital76@gmail.com> 
-      http://my.opera.com/Vital/blog/2007/11/21/javascript-beautify-on-javascript-translated
-
-
-  You are free to use this in any way you want, in case you find this useful or working for you.
-
-  Usage:
-    js_beautify(js_source_text);
-
-*/
-
-
-function js_beautify(js_source_text, indent_size, indent_character)
-{
-
-    var input, output, token_text, last_type, last_text, last_word, current_mode, modes, indent_level, indent_string;
-    var whitespace, wordchar, punct, parser_pos, line_starters, in_case;
-    var prefix, token_type, do_block_just_closed, var_line, var_line_tainted;
-
-
-    function trim_output()
-    {
-        while (output.length && (output[output.length - 1] === ' ' || output[output.length - 1] === indent_string)) {
-            output.pop();
-        }
-    }
-
-    function print_newline(ignore_repeated)
-    {
-        ignore_repeated = typeof ignore_repeated === 'undefined' ? true: ignore_repeated;
-        
-        trim_output();
-
-        if (!output.length) {
-            return; // no newline on start of file
-        }
-
-        if (output[output.length - 1] !== "\n" || !ignore_repeated) {
-            output.push("\n");
-        }
-        for (var i = 0; i < indent_level; i++) {
-            output.push(indent_string);
-        }
-    }
-
-
-
-    function print_space()
-    {
-        var last_output = output.length ? output[output.length - 1] : ' ';
-        if (last_output !== ' ' && last_output !== '\n' && last_output !== indent_string) { // prevent occassional duplicate space
-            output.push(' ');
-        }
-    }
-
-
-    function print_token()
-    {
-        output.push(token_text);
-    }
-
-    function indent()
-    {
-        indent_level++;
-    }
-
-
-    function unindent()
-    {
-        if (indent_level) {
-            indent_level--;
-        }
-    }
-
-
-    function remove_indent()
-    {
-        if (output.length && output[output.length - 1] === indent_string) {
-            output.pop();
-        }
-    }
-
-
-    function set_mode(mode)
-    {
-        modes.push(current_mode);
-        current_mode = mode;
-    }
-
-
-    function restore_mode()
-    {
-        do_block_just_closed = current_mode === 'DO_BLOCK';
-        current_mode = modes.pop();
-    }
-
-
-    function in_array(what, arr)
-    {
-        for (var i = 0; i < arr.length; i++)
-        {
-            if (arr[i] === what) {
-                return true;
-            }
-        }
-        return false;
-    }
-
-
-
-    function get_next_token()
-    {
-        var n_newlines = 0;
-        var c = '';
-
-        do {
-            if (parser_pos >= input.length) {
-                return ['', 'TK_EOF'];
-            }
-            c = input.charAt(parser_pos);
-
-            parser_pos += 1;
-            if (c === "\n") {
-                n_newlines += 1;
-            }
-        }
-        while (in_array(c, whitespace));
-
-        if (n_newlines > 1) {
-            for (var i = 0; i < 2; i++) {
-                print_newline(i === 0);
-            }
-        }
-        var wanted_newline = (n_newlines === 1);
-
-
-        if (in_array(c, wordchar)) {
-            if (parser_pos < input.length) {
-                while (in_array(input.charAt(parser_pos), wordchar)) {
-                    c += input.charAt(parser_pos);
-                    parser_pos += 1;
-                    if (parser_pos === input.length) {
-                        break;
-                    }
-                }
-            }
-
-            // small and surprisingly unugly hack for 1E-10 representation
-            if (parser_pos !== input.length && c.match(/^[0-9]+[Ee]$/) && input.charAt(parser_pos) === '-') {
-                parser_pos += 1;
-
-                var t = get_next_token(parser_pos);
-                c += '-' + t[0];
-                return [c, 'TK_WORD'];
-            }
-
-            if (c === 'in') { // hack for 'in' operator
-                return [c, 'TK_OPERATOR'];
-            }
-            return [c, 'TK_WORD'];
-        }
-        
-        if (c === '(' || c === '[') {
-            return [c, 'TK_START_EXPR'];
-        }
-
-        if (c === ')' || c === ']') {
-            return [c, 'TK_END_EXPR'];
-        }
-
-        if (c === '{') {
-            return [c, 'TK_START_BLOCK'];
-        }
-
-        if (c === '}') {
-            return [c, 'TK_END_BLOCK'];
-        }
-
-        if (c === ';') {
-            return [c, 'TK_END_COMMAND'];
-        }
-
-        if (c === '/') {
-            var comment = '';
-            // peek for comment /* ... */
-            if (input.charAt(parser_pos) === '*') {
-                parser_pos += 1;
-                if (parser_pos < input.length) {
-                    while (! (input.charAt(parser_pos) === '*' && input.charAt(parser_pos + 1) && input.charAt(parser_pos + 1) === '/') && parser_pos < input.length) {
-                        comment += input.charAt(parser_pos);
-                        parser_pos += 1;
-                        if (parser_pos >= input.length) {
-                            break;
-                        }
-                    }
-                }
-                parser_pos += 2;
-                return ['/*' + comment + '*/', 'TK_BLOCK_COMMENT'];
-            }
-            // peek for comment // ...
-            if (input.charAt(parser_pos) === '/') {
-                comment = c;
-                while (input.charAt(parser_pos) !== "\x0d" && input.charAt(parser_pos) !== "\x0a") {
-                    comment += input.charAt(parser_pos);
-                    parser_pos += 1;
-                    if (parser_pos >= input.length) {
-                        break;
-                    }
-                }
-                parser_pos += 1;
-                if (wanted_newline) {
-                    print_newline();
-                }
-                return [comment, 'TK_COMMENT'];
-            }
-
-        }
-
-        if (c === "'" || // string
-        c === '"' || // string
-        (c === '/' &&
-        ((last_type === 'TK_WORD' && last_text === 'return') || (last_type === 'TK_START_EXPR' || last_type === 'TK_END_BLOCK' || last_type === 'TK_OPERATOR' || last_type === 'TK_EOF' || last_type === 'TK_END_COMMAND')))) { // regexp
-            var sep = c;
-            var esc = false;
-            c = '';
-
-            if (parser_pos < input.length) {
-
-                while (esc || input.charAt(parser_pos) !== sep) {
-                    c += input.charAt(parser_pos);
-                    if (!esc) {
-                        esc = input.charAt(parser_pos) === '\\';
-                    } else {
-                        esc = false;
-                    }
-                    parser_pos += 1;
-                    if (parser_pos >= input.length) {
-                        break;
-                    }
-                }
-
-            }
-
-            parser_pos += 1;
-            if (last_type === 'TK_END_COMMAND') {
-                print_newline();
-            }
-            return [sep + c + sep, 'TK_STRING'];
-        }
-
-        if (in_array(c, punct)) {
-            while (parser_pos < input.length && in_array(c + input.charAt(parser_pos), punct)) {
-                c += input.charAt(parser_pos);
-                parser_pos += 1;
-                if (parser_pos >= input.length) {
-                    break;
-                }
-            }
-            return [c, 'TK_OPERATOR'];
-        }
-
-        return [c, 'TK_UNKNOWN'];
-    }
-
-
-    //----------------------------------
-
-    indent_character = indent_character || ' ';
-    indent_size = indent_size || 4;
-
-    indent_string = '';
-    while (indent_size--) {
-        indent_string += indent_character;
-    }
-
-    input = js_source_text;
-
-    last_word = ''; // last 'TK_WORD' passed
-    last_type = 'TK_START_EXPR'; // last token type
-    last_text = ''; // last token text
-    output = [];
-
-    do_block_just_closed = false;
-    var_line = false;
-    var_line_tainted = false;
-
-    whitespace = "\n\r\t ".split('');
-    wordchar = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_$'.split('');
-    punct = '+ - * / % & ++ -- = += -= *= /= %= == === != !== > < >= <= >> << >>> >>>= >>= <<= && &= | || ! !! , : ? ^ ^= |='.split(' ');
-
-    // words which should always start on new line.
-    line_starters = 'continue,try,throw,return,var,if,switch,case,default,for,while,break,function'.split(',');
-
-    // states showing if we are currently in expression (i.e. "if" case) - 'EXPRESSION', or in usual block (like, procedure), 'BLOCK'.
-    // some formatting depends on that.
-    current_mode = 'BLOCK';
-    modes = [current_mode];
-
-    indent_level = 0;
-    parser_pos = 0; // parser position
-    in_case = false; // flag for parser that case/default has been processed, and next colon needs special attention
-    while (true) {
-        var t = get_next_token(parser_pos);
-        token_text = t[0];
-        token_type = t[1];
-        if (token_type === 'TK_EOF') {
-            break;
-        }
-
-        switch (token_type) {
-
-        case 'TK_START_EXPR':
-            var_line = false;
-            set_mode('EXPRESSION');
-            if (last_type === 'TK_END_EXPR' || last_type === 'TK_START_EXPR') {
-                // do nothing on (( and )( and ][ and ]( ..
-            } else if (last_type !== 'TK_WORD' && last_type !== 'TK_OPERATOR') {
-                print_space();
-            } else if (in_array(last_word, line_starters) && last_word !== 'function') {
-                print_space();
-            }
-            print_token();
-            break;
-
-        case 'TK_END_EXPR':
-            print_token();
-            restore_mode();
-            break;
-
-        case 'TK_START_BLOCK':
-            
-            if (last_word === 'do') {
-                set_mode('DO_BLOCK');
-            } else {
-                set_mode('BLOCK');
-            }
-            if (last_type !== 'TK_OPERATOR' && last_type !== 'TK_START_EXPR') {
-                if (last_type === 'TK_START_BLOCK') {
-                    print_newline();
-                } else {
-                    print_space();
-                }
-            }
-            print_token();
-            indent();
-            break;
-
-        case 'TK_END_BLOCK':
-            if (last_type === 'TK_START_BLOCK') {
-                // nothing
-                trim_output();
-                unindent();
-            } else {
-                unindent();
-                print_newline();
-            }
-            print_token();
-            restore_mode();
-            break;
-
-        case 'TK_WORD':
-
-            if (do_block_just_closed) {
-                print_space();
-                print_token();
-                print_space();
-                break;
-            }
-
-            if (token_text === 'case' || token_text === 'default') {
-                if (last_text === ':') {
-                    // switch cases following one another
-                    remove_indent();
-                } else {
-                    // case statement starts in the same line where switch
-                    unindent();
-                    print_newline();
-                    indent();
-                }
-                print_token();
-                in_case = true;
-                break;
-            }
-
-
-            prefix = 'NONE';
-            if (last_type === 'TK_END_BLOCK') {
-                if (!in_array(token_text.toLowerCase(), ['else', 'catch', 'finally'])) {
-                    prefix = 'NEWLINE';
-                } else {
-                    prefix = 'SPACE';
-                    print_space();
-                }
-            } else if (last_type === 'TK_END_COMMAND' && (current_mode === 'BLOCK' || current_mode === 'DO_BLOCK')) {
-                prefix = 'NEWLINE';
-            } else if (last_type === 'TK_END_COMMAND' && current_mode === 'EXPRESSION') {
-                prefix = 'SPACE';
-            } else if (last_type === 'TK_WORD') {
-                prefix = 'SPACE';
-            } else if (last_type === 'TK_START_BLOCK') {
-                prefix = 'NEWLINE';
-            } else if (last_type === 'TK_END_EXPR') {
-                print_space();
-                prefix = 'NEWLINE';
-            }
-
-            if (last_type !== 'TK_END_BLOCK' && in_array(token_text.toLowerCase(), ['else', 'catch', 'finally'])) {
-                print_newline();
-            } else if (in_array(token_text, line_starters) || prefix === 'NEWLINE') {
-
-                if (last_text === 'else') {
-                    // no need to force newline on else break
-                    print_space();
-                } else if ((last_type === 'TK_START_EXPR' || last_text === '=') && token_text === 'function') {
-                    // no need to force newline on 'function': (function
-                    // DONOTHING
-                } else if (last_type === 'TK_WORD' && (last_text === 'return' || last_text === 'throw')) {
-                    // no newline between 'return nnn'
-                    print_space();
-                } else if (last_type !== 'TK_END_EXPR') {
-                    if ((last_type !== 'TK_START_EXPR' || token_text !== 'var') && last_text !== ':') {
-                        // no need to force newline on 'var': for (var x = 0...)
-                        if (token_text === 'if' && last_type === 'TK_WORD' && last_word === 'else') {
-                            // no newline for } else if {
-                            print_space();
-                        } else {
-                            print_newline();
-                        }
-                    }
-                }
-            } else if (prefix === 'SPACE') {
-                print_space();
-            }
-            print_token();
-            last_word = token_text;
-
-            if (token_text === 'var') {
-                var_line = true;
-                var_line_tainted = false;
-            }
-
-            break;
-
-        case 'TK_END_COMMAND':
-
-            print_token();
-            var_line = false;
-            break;
-
-        case 'TK_STRING':
-
-            if (last_type === 'TK_START_BLOCK' || last_type === 'TK_END_BLOCK') {
-                print_newline();
-            } else if (last_type === 'TK_WORD') {
-                print_space();
-            }
-            print_token();
-            break;
-
-        case 'TK_OPERATOR':
-
-            var start_delim = true;
-            var end_delim = true;
-            if (var_line && token_text !== ',') {
-                var_line_tainted = true;
-                if (token_text === ':') {
-                    var_line = false;
-                }
-            }
-
-            if (token_text === ':' && in_case) {
-                print_token(); // colon really asks for separate treatment
-                print_newline();
-                break;
-            }
-
-            in_case = false;
-
-            if (token_text === ',') {
-                if (var_line) {
-                    if (var_line_tainted) {
-                        print_token();
-                        print_newline();
-                        var_line_tainted = false;
-                    } else {
-                        print_token();
-                        print_space();
-                    }
-                } else if (last_type === 'TK_END_BLOCK') {
-                    print_token();
-                    print_newline();
-                } else {
-                    if (current_mode === 'BLOCK') {
-                        print_token();
-                        print_newline();
-                    } else {
-                        // EXPR od DO_BLOCK
-                        print_token();
-                        print_space();
-                    }
-                }
-                break;
-            } else if (token_text === '--' || token_text === '++') { // unary operators special case
-                if (last_text === ';') {
-                    // space for (;; ++i)
-                    start_delim = true;
-                    end_delim = false;
-                } else {
-                    start_delim = false;
-                    end_delim = false;
-                }
-            } else if (token_text === '!' && last_type === 'TK_START_EXPR') {
-                // special case handling: if (!a)
-                start_delim = false;
-                end_delim = false;
-            } else if (last_type === 'TK_OPERATOR') {
-                start_delim = false;
-                end_delim = false;
-            } else if (last_type === 'TK_END_EXPR') {
-                start_delim = true;
-                end_delim = true;
-            } else if (token_text === '.') {
-                // decimal digits or object.property
-                start_delim = false;
-                end_delim = false;
-
-            } else if (token_text === ':') {
-                // zz: xx
-                // can't differentiate ternary op, so for now it's a ? b: c; without space before colon
-                if (last_text.match(/^\d+$/)) {
-                    // a little help for ternary a ? 1 : 0;
-                    start_delim = true;
-                } else {
-                    start_delim = false;
-                }
-            }
-            if (start_delim) {
-                print_space();
-            }
-
-            print_token();
-
-            if (end_delim) {
-                print_space();
-            }
-            break;
-
-        case 'TK_BLOCK_COMMENT':
-
-            print_newline();
-            print_token();
-            print_newline();
-            break;
-
-        case 'TK_COMMENT':
-
-            // print_newline();
-            print_space();
-            print_token();
-            print_newline();
-            break;
-
-        case 'TK_UNKNOWN':
-            print_token();
-            break;
-        }
-
-        last_type = token_type;
-        last_text = token_text;
-    }
-
-    return output.join('');
-
-}
--- a/views/test/reducehaving-reduce.js
+++ b/views/test/reducehaving-reduce.js
@ -1,2 +0,0 @@
-
-function(ks,vs){return vs.length;}
--- a/views/word_count/count-map.js
+++ b/views/word_count/count-map.js
@ -0,0 +1,2 @@
+
+function(doc){var words=doc.text.split(/\W/).map(function(w){return w.toLowerCase()});words.forEach(function(word){if(word.length>0)emit([word,doc.title],1);});}
--- a/views/word_count/count-reduce.js
+++ b/views/word_count/count-reduce.js
@ -0,0 +1,2 @@
+
+function(key,combine){return sum(combine);}
				`@ -0,0 +1,2 @@`

				`function(doc){doc.title&&doc.chunk&&emit([doc.title,doc.chunk],null);}`
				`@ -0,0 +1,2 @@`

				`function(doc){if(doc.text&&doc.text.match(/united/))emit([doc.title,doc.chunk],null)}`
				`@ -0,0 +1,2 @@`

				`function(doc){var words=doc.text.split(/\W/).filter(function(w){return w.length>0}).map(function(w){return w.toLowerCase()});for(var i=0,l=words.length;i<l;i++){emit(words.slice(i,4),doc.title);}}`
				`@ -0,0 +1,2 @@`

				`function(key,vs,c){if(c){return sum(vs);}else{return vs.length;}}`
				`@ -1,2 +0,0 @@`

				`function(doc){if(doc.mp3s){for(var i=0,m;m=doc.mp3s[i];i++){emit(m.href,doc.fetch.url);}}}`
				`@ -1,2 +0,0 @@`

				`function(doc){var fetchurl=doc.fetch&&doc.fetch.url;if(!fetchurl)return;doc.entries&&doc.entries.forEach(function(e){e.mp3s&&e.mp3s.forEach(function(mp3){mp3.href&&emit(mp3.href,fetchurl);});});doc.playlist&&doc.playlist.track&&doc.playlist.track.forEach(function(t){t.location&&t.location.forEach(function(url){emit(url,fetchurl);});});doc.mp3s&&doc.mp3s.forEach(function(mp3){mp3.href&&emit(mp3.href,fetchurl);});}`
				`@ -1,2 +0,0 @@`

				`function(ks,vs){log({keys:ks});log({values:vs});return 1;};`
				`@ -1,2 +0,0 @@`

				`function(doc){doc.playlist&&doc.playlist.track&&doc.playlist.track.forEach(function(t){emit([t.creator\|\|null,t.title\|\|null],t.album\|\|null);});};`
				`@ -1,2 +0,0 @@`

				`function(ks,vs,c){if(c){return null;}else{log(ks[0][0][0]);return ks[0][0][0];}};`
				`@ -0,0 +1,2 @@`

				`function(key,combine){return sum(combine);}`