本帖最后由 flashercs 于 2018-12-4 07:50 编辑
回复 27# lxh623
有作者了 需要重新下载的.
zgshige.bat- 0<1/*,:
- @echo off
- CScript.exe -nologo -e:jscript %0
- pause
- exit /b
- */;
- function makeXHR() {
- for (var i = 0; i < aXMLHttpVers.length; i++) {
- try {
- return new ActiveXObject(aXMLHttpVers[i]);
- } catch (error) { }
- }
- showError('Can\'t build XMLHTTP automation object.');
- WScript.Quit(1);
- }
- function makeHtmldoc() {
- var htmldoc = new ActiveXObject('htmlfile');
- htmldoc.open();
- htmldoc.write('<!DOCTYPE html><html><head><meta charset="utf-8" /><meta http-equiv="X-UA-Compatible" content="IE=edge"><title>Page Title</title><meta name="viewport" content="width=device-width, initial-scale=1"></head><body></body></html>');
- htmldoc.close();
- return htmldoc;
- }
- function makeADOStream() {
- var oADO = new ActiveXObject('ADODB.Stream');
- oADO.Mode = 3;
- return oADO;
- }
- function byte2str(ado, byteArr, sEncoding) {
- ado.Type = 1;
- ado.Open();
- ado.Write(byteArr);
- ado.Position = 0;
- ado.Type = 2;
- ado.Charset = sEncoding;
- var s = ado.ReadText(-1);
- ado.Close();
- return s;
- }
- function xhrWorkers() {
- var url;
- var xhr_ = makeXHR();
- var htmldoc = makeHtmldoc();
- var oADO = makeADOStream();
- var re = /<!-- 诗歌正文开始 -->[\s\S]+(?=<!-- 诗歌正文结束 -->)/i;
- xhr_.onReadyStateChange = function () {
- if (xhr_.readyState === 4) {
- if (xhr_.status === 200) {
- var str = byte2str(oADO, xhr_.responseBody, 'utf-8');
- var m = str.match(re);
- if (m) {
- try {
- htmldoc.body.innerHTML = m[0];
- var title = 'TTT ' + htmldoc.body.getElementsByTagName('h3')[0].innerText;
- var author = htmldoc.body.children[1].children[0].children[0].innerText;
- var content = htmldoc.body.getElementsByClassName('m-lg font14')[0].innerText;
- tsOut.WriteLine((title + '\r\n' + author + '\r\n' + content + '\r\n').replace(/\r?\n\s*/g, '<br/>\r\n'));
- tsCache.WriteLine(url);
- WScript.StdOut.WriteLine(url);
- } catch (e) {
- // WScript.StdOut.WriteLine(htmldoc.documentElement.getElementsByClassName ? 'className' : htmldoc.documentElement.getElementsByTagName ? 'tagName' : 'nothing');
- }
- }
- } else {
- WScript.StdOut.WriteLine(url + ' status=' + xhr_.status);
- }
- aXhr.push(worker);
- }
- };
- var worker = {
- job: function (method, uri, async) {
- url = uri;
- xhr_.open(method, uri, async);
- // setRequestHeaders(xhr_);
- xhr_.send();
- }
- };
- return worker;
- }
- function setRequestHeaders(xhr_) {
- xhr_.setRequestHeader('Accept', 'text/html, application/xhtml+xml, application/xml; q=0.9, */*; q=0.8');
- xhr_.setRequestHeader('Accept-Language', 'en-US, en; q=0.8, zh-Hans-CN; q=0.5, zh-Hans; q=0.3');
- xhr_.setRequestHeader('Accept-Encoding', 'gzip, deflate');
- xhr_.setRequestHeader('TE', 'gzip, deflate');
- // xhr_.setRequestHeader('Connection', 'close');
- xhr_.setRequestHeader('Cache-Control', 'no-cache');
- xhr_.setRequestHeader('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36 Edge/17.17134');
- }
- function parseURL(href, url) {
- var $path = href.replace(/^[^:]*:/, '');
- if (/^\/\/.*/.test($path)) {
- return href;
- } else if (/^\/.*/.test($path)) {
- return domain + $path;
- } else {
- return url.replace(/[^\/]+$/, '') + $path;
- }
- }
- function showError(err, source) {
- WScript.StdOut.WriteLine('[object Error]' === Object.prototype.toString.call(err) ?
- [
- err.name,
- 'source: ' + (undefined === source ? '' : source),
- 'number: ' + (err.number >>> 0).toString(16),
- 'Information: ' + err.message
- ].join('\r\n')
- :
- err);
- }
- var aXMLHttpVers = ['MSXML2.XMLHTTP.6.0', 'MSXML2.XMLHTTP.3.0', 'MSXML2.XMLHTTP', 'Microsoft.XMLHTTP'],
- fso = new ActiveXObject('Scripting.FileSystemObject'),
- domain = "http://www.zgshige.com",
- outFile = "zgshige.txt",
- cacheFile = "cache_zgshige.txt",
- tsOut,
- tsCache,
- oCache = {};
- new ActiveXObject('WScript.Shell').CurrentDirectory = fso.GetParentFolderName(WScript.ScriptFullName);
- try {
- tsCache = fso.OpenTextFile(cacheFile, 1, true);
- } catch (e) {
- WScript.Echo('can not read cache file ' + cacheFile);
- WScript.Quit(1);
- }
- while (!tsCache.AtEndOfStream) {
- oCache[tsCache.ReadLine()] = true;
- }
- tsCache.Close();
- try {
- tsCache = fso.OpenTextFile(cacheFile, 8, true);
- } catch (e) {
- showError(e, 'can not write cache file ' + cacheFile);
- WScript.Quit(2);
- }
- try {
- tsOut = fso.OpenTextFile(outFile, 8, true, -1);//unicode encoding
- } catch (e) {
- showError(e, 'can not write file ' + outFile);
- WScript.Quit(3);
- }
- var oTasks = {
- atEnd: false,
- current: null,
- moveNext: (function () {
- var aIndex = [];
- // var nextPage = 'http://www.zgshige.com/sg/index.shtml'; 可以自定义起始位置
- var nextPage = 'http://www.zgshige.com/zcms/catalog/15112/pc/index_201.shtml';
- var xhr = makeXHR();
- var htmldoc = makeHtmldoc();
- var oADO = makeADOStream();
- var re = /<!-- 主体开始 -->[\S\s]*?(?=<!-- 主体结束 -->)/i;
- var url = nextPage;
- xhr.onReadyStateChange = function () {
- if (xhr.readyState === 4) {
- if (200 === xhr.status) {
- var str = byte2str(oADO, xhr.responseBody, 'utf-8');
- var m = str.match(re);
- if (m) {
- htmldoc.body.innerHTML = m[0];
- var nodes = htmldoc.documentElement.getElementsByClassName('fc_ch1');
- for (nextPage = null, i = 0, l = nodes.length; i < l; ++i) {
- if (nodes[i].firstChild.nodeValue === '下一页') {
- nextPage = parseURL(nodes[i].parentNode.getAttribute('href'), url);
- break;
- }
- }
- if (!oCache[url]) {
- nodes = htmldoc.body.getElementsByClassName('fc-green text-uppercase');
- for (var i = 0, l = nodes.length; i < l; ++i) {
- var item = parseURL(nodes[i].getAttribute('href'), url);
- if (!oCache[item]) {
- aIndex.push(item);
- }
- }
- aIndex.push(url);
- }
- if (url = nextPage) {
- xhr.open('GET', url, true);
- // setRequestHeaders(xhr);
- xhr.send();
- } else {
- oTasks.atEnd = true;
- return false;
- }
-
- } else {
- nextPage = null;
- oTasks.atEnd = true;
- return false;
- }
- } else {
- nextPage = null;
- oTasks.atEnd = true;
- return false;
- }
- }
- };
- xhr.open('GET', url, true);
- setRequestHeaders(xhr);
- xhr.send();
- return function () {
- return aIndex.shift();
- };
- })()
- };
- var maxThreads = 20,
- aXhr = [],
- i, l, task,
- reIndex = /index.*html$/i;
- for (i = 0; i < maxThreads; ++i) {
- aXhr.push(xhrWorkers());
- }
- while (!oTasks.atEnd) {
- while (task = oTasks.moveNext()) {
- if (reIndex.test(task)) {
- tsCache.WriteLine(task);
- WScript.StdOut.WriteLine(task);
- continue;
- }
- while (!aXhr.length) {
- WScript.Sleep(10);
- }
- aXhr.shift().job('GET', task, true);
- }
- WScript.Sleep(100);
- }
- while (task = oTasks.moveNext()) {
- if (reIndex.test(task)) {
- tsCache.WriteLine(task);
- WScript.StdOut.WriteLine(task);
- continue;
- }
- while (!aXhr.length) {
- WScript.Sleep(10);
- }
- aXhr.shift().job('GET', task, true);
- }
-
- tsOut.close();
- tsCache.close();
- WScript.Quit(0);
复制代码
|