// ==UserScript==
// @name Save Vietnamese xenforo forum content to HTML file
// @description Save your favorite thread into a html file
// @namespace Save thread to file
// @icon 
// @match https://voz.vn/t/*
// @match https://forum.gocmod.com/threads/*
// @match https://www.otofun.net/threads/*
// @match https://vn-z.vn/threads/*
// @match https://*xamvn*.*/threads/*
// @match https://*xamvn*.*/r/*
// @match https://*rphang*.*/t/*
// @match https://*thiendia*.*/threads/*
// @grant GM_xmlhttpRequest
// @version 1.2
// @author kylyte
// @license GPL-3.0
// ==/UserScript==
// Configuration options
const config = {
waitTime: 100, // Base wait time between requests in ms
saveWithImages: true, // Set to false if you don't want to save images
concurrentRequests: 15, // How many pages to fetch simultaneously (lower values = more stable but slower)
chunkSize: 15, // Process the thread in chunks to avoid memory issues
showDebugInfo: true, // Show additional console information for debugging
maxChunkSizeForHtml: 10000 * 1024
};
const sleep = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
const log = (...args) => config.showDebugInfo && console.log('[Voz Saver]', ...args);
async function createHash(message) {
const msgUint8 = new TextEncoder().encode(message);
const hashBuffer = await window.crypto.subtle.digest("SHA-1", msgUint8);
const hashArray = Array.from(new Uint8Array(hashBuffer));
return hashArray.map((b) => b.toString(16).padStart(2, "0")).join("");
}
function xhr(url, detail = {}) {
const nurl = new URL(url);
let options = { url: url, origin: nurl.origin };
if (typeof detail === 'string' && /^(?:blob|text|json|arraybuffer|document)$/.test(detail)) {
options.responseType = detail;
} else if (typeof detail === 'object') {
options = { ...options, ...detail };
}
return new Promise(resolve => {
options.onloadend = res => (res.status === 200) ? resolve(res.response) : resolve(false);
options.onerror = () => resolve(false);
options.ontimeout = () => resolve(false);
GM_xmlhttpRequest(options);
});
}
function toDataURL(data) {
return new Promise((resolve, reject) => {
const reader = new FileReader();
reader.onload = () => resolve(reader.result);
reader.onerror = () => reject(reader.error);
reader.readAsDataURL(new Blob([data]));
});
}
async function compressData(data) {
const blob = new Blob([data]);
const compressedStream = blob.stream().pipeThrough(new CompressionStream("gzip"));
return await new Response(compressedStream).blob();
}
async function extractStyles(doc) {
const links = doc.querySelectorAll('link[rel="stylesheet"]');
const styles = [];
for (let i = 0; i < links.length; i++) {
try {
const href = links[i].href;
if (!href) continue;
const css = await xhr(href, 'text');
if (css) {
styles.push(css);
}
} catch (err) {
log('Failed to process stylesheet:', links[i].href, err);
}
}
doc.querySelectorAll('style').forEach(style => {
styles.push(style.textContent);
});
return styles.join('\n');
}
async function processContent(htmlStr, isFirstPage = false, images = {}, cssStyles = null) {
const parser = new DOMParser();
let html = parser.parseFromString(htmlStr, 'text/html');
html.querySelector('.blockMessage--none')?.remove();
html.querySelectorAll('form').forEach(el => el?.remove());
html.querySelectorAll('div.block').forEach(el => {
if (el.matches('.block--messages')) return;
el.remove();
});
html.querySelectorAll('div.p-body-main.p-body-main--withSidebar>*').forEach(el => {
if (el.matches('.p-body-content')) return;
el.remove();
});
html.querySelector('footer.p-footer')?.remove();
html.querySelectorAll('[href]').forEach(el => {
let href = el.getAttribute('href');
if (href && href.startsWith('/')) el.setAttribute('href', location.origin + href);
});
html.querySelectorAll('[src]').forEach(el => {
let src = el.getAttribute('src');
if (src && src.startsWith('data:image')) el.setAttribute('src', el.getAttribute('data-src') || src);
if (src && src.startsWith('/')) el.setAttribute('src', location.origin + src);
});
html.querySelectorAll('[srcset]').forEach(el => {
let srcset = el.getAttribute('srcset');
if (srcset) {
srcset = srcset.split(',').map(a => a.trim().startsWith('/') ? location.origin + a.trim() : a).join(',');
el.setAttribute('srcset', srcset);
}
});
html.querySelectorAll('div.bbCodeBlock-content>div.bbCodeBlock-expandContent.js-expandContent').forEach(el => el.className = '');
html.querySelectorAll('.bbCodeSpoiler-button,.bbCodeSpoiler-content').forEach(el => el.classList.add('is-active'));
html.querySelectorAll('div.pageNav a').forEach(el => el.removeAttribute('href'));
if (config.saveWithImages) {
const imgElements = html.querySelectorAll('img');
const imgPromises = [];
for (let i = 0; i < imgElements.length; i++) {
const img = imgElements[i];
if (!img.src || img.src.startsWith('data:image')) continue;
imgPromises.push((async () => {
try {
const key = await createHash(img.src);
if (images[key]) {
img.setAttribute('image-data', key);
return;
}
const imgBlob = await xhr(img.src, 'blob');
if (!imgBlob) return;
const dataUrl = await toDataURL(imgBlob);
images[key] = dataUrl;
img.setAttribute('image-data', key);
} catch (err) {
log('Failed to process image:', img.src, err);
}
})());
if (imgPromises.length >= 5) {
await Promise.all(imgPromises);
imgPromises.length = 0;
await sleep(100);
}
}
if (imgPromises.length > 0) {
await Promise.all(imgPromises);
}
}
let extractedCss = null;
if (isFirstPage && !cssStyles) {
extractedCss = await extractStyles(html);
html.querySelectorAll('link[rel="stylesheet"]').forEach(el => el.remove());
const styleEl = html.createElement('style');
styleEl.textContent = extractedCss;
html.head.appendChild(styleEl);
} else if (isFirstPage && cssStyles) {
html.querySelectorAll('link[rel="stylesheet"]').forEach(el => el.remove());
const styleEl = html.createElement('style');
styleEl.textContent = cssStyles;
html.head.appendChild(styleEl);
}
const threadBody = html.querySelector('div.p-body-main');
if (!threadBody) {
throw new Error('Could not find thread body content');
}
const compressedBody = await compressData(threadBody.outerHTML);
const threadBodyUrl = await toDataURL(compressedBody);
let wrapperUrl = '';
if (isFirstPage) {
threadBody.outerHTML = `{ThreadBody_PLACEHOLDER}`;
const serialized = new XMLSerializer().serializeToString(html);
const compressed = await compressData(serialized);
wrapperUrl = await toDataURL(compressed);
}
return { wrapperUrl, threadBodyUrl, extractedCss };
}
async function saveThread(threadId,threadInPath) {
const maxPageEl = document.querySelector("ul.pageNav-main>li:last-of-type>a");
const maxPage = maxPageEl ? parseInt(maxPageEl.textContent) : 1;
let pageRange = prompt(
"Nhập thông số để tải xuống. Ví dụ:\n" +
"Nhập 1-50 sẽ tải từ trang 1 tới trang 50\n" +
"Nhập 5 sẽ tải chỉ trang 5\n" +
"Bỏ trống sẽ tải tất cả các trang"
);
let startPage = 1;
let endPage = maxPage;
if (pageRange) {
if (pageRange.includes('-')) {
const [start, end] = pageRange.split('-').map(p => parseInt(p.trim()));
if (!isNaN(start) && !isNaN(end) && start >= 1 && end <= maxPage) {
startPage = start;
endPage = end;
} else {
alert(`Invalid range. Using full range (1-${maxPage}).`);
}
} else {
const page = parseInt(pageRange.trim());
if (!isNaN(page) && page >= 1 && page <= maxPage) {
startPage = page;
endPage = page;
} else {
alert(`Invalid page number. Using full range (1-${maxPage}).`);
}
}
}
document.body.insertAdjacentHTML("beforeend",
`<div id="voz_saver_progress" style="position:fixed; bottom:0; left:0; right:0; background:rgba(0,0,0,0.8); color:white; padding:10px; z-index:9999; display:flex; flex-direction:column;">
<div style="display:flex; justify-content:space-between; margin-bottom:5px;">
<span>Saving thread (0/${endPage - startPage + 1} pages)</span>
<button id="voz_saver_cancel" style="background:#ff4444; border:none; color:white; padding:2px 8px; cursor:pointer;">Cancel</button>
</div>
<progress id="voz_saver_progress_bar" value="0" max="${endPage - startPage + 1}" style="width:100%; height:20px;"></progress>
<div id="voz_saver_status" style="margin-top:5px; font-size:12px;">Initializing...</div>
</div>`
);
const progressBar = document.getElementById('voz_saver_progress_bar');
const progressText = document.querySelector('#voz_saver_progress span');
const statusText = document.getElementById('voz_saver_status');
const cancelButton = document.getElementById('voz_saver_cancel');
let cancelled = false;
cancelButton.addEventListener('click', () => {
cancelled = true;
statusText.textContent = 'Cancelling...';
});
const images = {};
const pages = [];
let pageCount = 0;
let cssStyles = null;
const updateProgress = () => {
progressBar.value = pageCount;
progressText.textContent = `Saving thread (${pageCount}/${endPage - startPage + 1} pages)`;
};
async function fetchPage(pageNo, retries = 3) {
if (cancelled) return false;
try {
statusText.textContent = `Fetching page ${pageNo}...`;
const pageUrl = `${location.origin}/${threadInPath}/${threadId}/page-${pageNo}`;
const response = await fetch(pageUrl);
if (response.status !== 200) {
if (retries > 0) {
statusText.textContent = `Error fetching page ${pageNo}, retrying (${retries} left)...`;
await sleep(1000);
return fetchPage(pageNo, retries - 1);
}
throw new Error(`Failed to fetch page ${pageNo} (status: ${response.status})`);
}
const html = await response.text();
statusText.textContent = `Processing page ${pageNo}...`;
const { wrapperUrl, threadBodyUrl, extractedCss } = await processContent(html, pageNo === startPage, images, cssStyles);
if (pageNo === startPage && extractedCss) {
cssStyles = extractedCss;
}
if (pageNo === startPage) {
pages[0] = wrapperUrl;
}
pages[pageNo] = threadBodyUrl;
pageCount++;
updateProgress();
return true;
} catch (err) {
if (retries > 0) {
statusText.textContent = `Error processing page ${pageNo}, retrying (${retries} left)...`;
await sleep(1000);
return fetchPage(pageNo, retries - 1);
}
log(`Error processing page ${pageNo}:`, err);
statusText.textContent = `Failed to process page ${pageNo}: ${err.message}`;
return false;
}
}
const chunks = [];
for (let i = startPage; i <= endPage; i += config.chunkSize) {
chunks.push(Array.from({ length: Math.min(config.chunkSize, endPage - i + 1) }, (_, j) => i + j));
}
for (let i = 0; i < chunks.length; i++) {
if (cancelled) break;
statusText.textContent = `Processing chunk ${i+1}/${chunks.length}...`;
for (let j = 0; j < chunks[i].length; j += config.concurrentRequests) {
if (cancelled) break;
const batch = chunks[i].slice(j, j + config.concurrentRequests);
const promises = batch.map(pageNo => fetchPage(pageNo));
await Promise.all(promises);
await sleep(config.waitTime);
}
if (i < chunks.length - 1) {
statusText.textContent = `Chunk ${i+1} complete. Taking a short break...`;
await sleep(1000);
}
}
if (cancelled) {
document.getElementById('voz_saver_progress').remove();
return null;
}
statusText.textContent = 'Building final HTML file...';
const title = document.querySelector('title')?.textContent
.split('-').pop()?.split('|')[0].trim() || 'vozThread';
const fileName = `${title}_${new Date().toISOString().slice(0, 10)}.html`;
await generateHtmlFile(fileName, threadId, pages, images, cssStyles, maxPage, startPage, endPage);
document.getElementById('voz_saver_progress').remove();
return true;
}
async function generateHtmlFile(fileName, threadId, pages, images, cssStyles, maxPage, startPage, endPage) {
let blobParts = [];
blobParts.push(`
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>${fileName}</title>
<style>
/* Custom navigation controls */
body, html { margin: 0; padding: 0; }
#loading {
position: fixed; top: 0; left: 0; right: 0; bottom: 0;
background: rgba(0,0,0,0.8); color: white; display: flex;
flex-direction: column; justify-content: center; align-items: center;
z-index: 9999;
}
.navigation-controls {
position: fixed;
bottom: 20px;
right: 20px;
background: #f0f0f0;
padding: 10px;
border-radius: 5px;
box-shadow: 0 2px 5px rgba(0,0,0,0.2);
z-index: 1000;
display: flex;
gap: 10px;
}
.nav-button {
background: #4a86e8;
color: white;
border: none;
padding: 5px 10px;
border-radius: 3px;
cursor: pointer;
}
.nav-button:hover {
background: #2a66c8;
}
.nav-button:disabled {
background: #ccc;
cursor: not-allowed;
}
/* Thread CSS */
${cssStyles || ''}
</style>
</head>
<body>
<div id="loading">
<h2>Loading Thread...</h2>
<progress id="loading-progress" value="0" max="100" style="width:80%; max-width:400px"></progress>
<div id="loading-text">Initializing...</div>
</div>
<div id="screen"></div>
<div class="navigation-controls">
<button id="prev-page-btn" class="nav-button" disabled>Previous</button>
<span id="page-display">Page ${startPage}</span>
<button id="next-page-btn" class="nav-button">Next</button>
<button id="goto-page-btn" class="nav-button">Go to Page</button>
</div>
<script>
// Thread data
const threadId = "${threadId}";
const threadBodyReplacement = "{ThreadBody_PLACEHOLDER}";
const maxPage = ${maxPage};
const startPage = ${startPage};
const endPage = ${endPage};
let currentPage = ${startPage};
const pages = {};
const images = {};
`);
blobParts.push(`
pages[0] = "${pages[0]}";
`);
for (let i = startPage; i <= endPage; i++) {
if (pages[i]) {
const pageChunk = `
// Add page ${i}
pages[${i}] = "${pages[i]}";
`;
blobParts.push(pageChunk);
if (i % 50 === 0) {
await sleep(0);
}
}
}
const imageKeys = Object.keys(images);
for (let i = 0; i < imageKeys.length; i += 10) {
const chunk = imageKeys.slice(i, i + 10);
let imageChunk = `
// Images chunk ${Math.floor(i/10) + 1}
`;
for (const key of chunk) {
imageChunk += ` images["${key}"] = "${images[key]}";\n`;
}
blobParts.push(imageChunk);
if (i % 100 === 99) {
await sleep(0);
}
}
blobParts.push(`
async function decompressData(data) {
const response = await fetch(data);
const blob = await response.blob();
const decompressedStream = blob.stream().pipeThrough(new DecompressionStream("gzip"));
return await new Response(decompressedStream).blob();
}
async function blobToText(blob) {
return await new Promise((resolve) => {
const reader = new FileReader();
reader.onload = () => resolve(reader.result);
reader.readAsText(blob);
});
}
function updateNavButtons() {
prevBtn.disabled = currentPage <= startPage;
nextBtn.disabled = currentPage >= endPage;
pageDisplay.textContent = \`Page \${currentPage}\`;
}
const screen = document.getElementById('screen');
const loading = document.getElementById('loading');
const loadingProgress = document.getElementById('loading-progress');
const loadingText = document.getElementById('loading-text');
const prevBtn = document.getElementById('prev-page-btn');
const nextBtn = document.getElementById('next-page-btn');
const gotoBtn = document.getElementById('goto-page-btn');
const pageDisplay = document.getElementById('page-display');
prevBtn.addEventListener('click', () => {
if (currentPage > startPage) {
showPage(currentPage - 1);
}
});
nextBtn.addEventListener('click', () => {
if (currentPage < endPage) {
showPage(currentPage + 1);
}
});
gotoBtn.addEventListener('click', () => {
const pageNo = prompt(\`Enter page number (\${startPage}-\${endPage})\`, currentPage);
if (pageNo && !isNaN(pageNo)) {
const page = parseInt(pageNo);
if (page >= startPage && page <= endPage) {
showPage(page);
} else {
alert(\`Please enter a number between \${startPage} and \${endPage}\`);
}
}
});
let threadWrapper;
async function showPage(pageId = startPage) {
try {
loading.style.display = 'flex';
loadingText.textContent = \`Loading page \${pageId}...\`;
const decompressedBody = await decompressData(pages[pageId]);
const threadBody = await blobToText(decompressedBody);
const pageContent = threadWrapper.replace(threadBodyReplacement, threadBody);
screen.innerHTML = pageContent;
const imgs = screen.querySelectorAll('img[image-data]');
if (imgs.length > 0) {
loadingProgress.max = imgs.length;
loadingProgress.value = 0;
let loadedImages = 0;
for (const img of imgs) {
const key = img.getAttribute('image-data');
if (key && images[key]) {
img.src = images[key];
}
loadedImages++;
loadingProgress.value = loadedImages;
loadingText.textContent = \`Loading images (\${loadedImages}/\${imgs.length})...\`;
if (loadedImages % 10 === 0) {
await new Promise(r => setTimeout(r, 0));
}
}
}
setupPageNavigation();
currentPage = pageId;
updateNavButtons();
window.scrollTo(0, 0);
loading.style.display = 'none';
} catch (error) {
console.error('Error showing page:', error);
loadingText.textContent = \`Error loading page \${pageId}: \${error.message}\`;
}
}
function setupPageNavigation() {
screen.querySelectorAll('ul.pageNav-main a:not([id])').forEach(el => {
el.addEventListener('click', e => {
e.preventDefault();
const pageNum = parseInt(e.target.textContent.trim());
if (!isNaN(pageNum) && pageNum >= startPage && pageNum <= endPage) {
showPage(pageNum);
}
});
});
screen.querySelectorAll('ul.pageNav-main a[title="Go to page"]').forEach(el => {
el.addEventListener('click', e => {
e.preventDefault();
const pageNo = prompt(\`Enter page number (\${startPage}-\${endPage})\`, currentPage);
if (pageNo && !isNaN(pageNo)) {
const page = parseInt(pageNo);
if (page >= startPage && page <= endPage) {
showPage(page);
}
}
});
});
screen.querySelectorAll('.pageNav-jump.pageNav-jump--next').forEach(el => {
el.addEventListener('click', e => {
e.preventDefault();
if (currentPage < endPage) {
showPage(currentPage + 1);
}
});
});
screen.querySelectorAll('.pageNav-jump.pageNav-jump--prev').forEach(el => {
el.addEventListener('click', e => {
e.preventDefault();
if (currentPage > startPage) {
showPage(currentPage - 1);
}
});
});
}
(async function init() {
try {
loadingText.textContent = 'Preparing thread template...';
const decompressedWrapper = await decompressData(pages[0]);
threadWrapper = await blobToText(decompressedWrapper);
await showPage(startPage);
} catch (error) {
console.error('Initialization error:', error);
loadingText.textContent = \`Error initializing: \${error.message}\`;
}
})();
</script>
</body>
</html>
`);
const blob = new Blob(blobParts, { type: 'text/html' });
const downloadLink = document.createElement('a');
downloadLink.href = URL.createObjectURL(blob);
downloadLink.download = fileName;
downloadLink.style.display = 'none';
document.body.appendChild(downloadLink);
downloadLink.click();
document.body.removeChild(downloadLink);
setTimeout(() => URL.revokeObjectURL(downloadLink.href), 1000);
}
(async function main() {
const domain = window.location.hostname;
const threadName = ['t', 'r', 'threads'];
const threadRegx = threadName.join('|');
const reg = new RegExp(`https:\/\/(?:.*\\.)?${domain.replaceAll('.', '\\.')}\/(${threadRegx})\/(?:[^\/]+\\.)?(\\d+)\/?(?:page-(\\d+))?`);
console.log(reg);
const match = location.href.match(reg);
const threadInPath=match[1];
const threadId = match[2];
const createSaveButton = () => {
const btn = document.createElement("a");
btn.classList.add("pageNav-jump", "pageNav-jump--next");
btn.textContent = "Lưu Thread";
btn.style.cursor = "pointer";
btn.style.marginTop = "5px";
btn.addEventListener("click", async () => {
await saveThread(threadId,threadInPath);
});
return btn;
};
document.querySelectorAll(".p-description").forEach(desc => {
const ul = desc.querySelector("ul.listInline");
if (ul) {
const btn = createSaveButton();
ul.after(btn);
}
});
})();