Skip to content

Commit

Permalink
Add -m, --match <globs...> option
Browse files Browse the repository at this point in the history
  • Loading branch information
zerodevx committed Feb 11, 2022
1 parent 08d5b34 commit 75a4d84
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 22 deletions.
20 changes: 14 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ CLI to generate XML sitemaps for static sites from local filesystem
Options:
-b, --base <url> base URL (required)
-r, --root <dir> root working directory (default: ".")
-m, --match <glob...> globs to match (default: ["**/*.html"])
-i, --ignore <glob...> globs to ignore (default: ["404.html"])
-c, --changefreq <glob,changefreq...> comma-separated glob-changefreq pairs
-p, --priority <glob,priority...> comma-separated glob-priority pairs
Expand All @@ -59,7 +60,7 @@ Options:

#### HTML parsing

By default, all matched files are piped through a fast
By default, all matched `.html` files are piped through a fast
[HTML parser](https://github.com/fb55/htmlparser2) to detect if the `noindex`
[meta tag](https://developers.google.com/search/docs/advanced/crawling/block-indexing#meta-tag) is
set - typically in the form of `<meta name="robots" content="noindex" />` - in which case that file
Expand Down Expand Up @@ -99,13 +100,13 @@ Disabled by default; pass option `--slash` to enable.
[always added](https://github.com/zerodevx/static-sitemap-cli/tree/v1#to-slash-or-not-to-slash) to
root domains.

#### Ignore some files
#### Match or ignore files

The `-i` flag allows multiple entries. By default, it's set to the `["404.html"]`. Change the glob
ignore patterns to suit your use-case like so:
The `-m` and `-i` flags allow multiple entries. By default, they are set to the `["**/*.html"]` and
`["404.html"]` respectively. Change the glob patterns to suit your use-case like so:

```
$ sscli ... -i '404.html' '**/ignore/**' 'this/other/specific/file.html'
$ sscli ... -m '**/*.{html,jpg,png}' -i '404.html' 'ignore/**' 'this/other/specific/file.html'
```

#### Glob-[*] pairs
Expand Down Expand Up @@ -143,7 +144,13 @@ $ sscli -b https://x.com -r dist -f xml -o > www/sm.xml
#### Get subset of a directory

```
$ sscli -b https://x.com/foo -r dist/foo -f txt -o > dist/sitemap.txt
$ sscli -b https://x.com/foo -r dist/foo -f xml -o > dist/sitemap.xml
```

#### Generate TXT sitemap for image assets

```
$ sscli -b https://x.com -r dist -m '**/*.{jpg,jpeg,gif,png,bmp,webp,svg}' -f txt
```

## Programmatic Use
Expand All @@ -160,6 +167,7 @@ import {
const options = {
base: 'https://x.com',
root: 'path/to/root',
match: ['**/*html'],
ignore: ['404.html'],
changefreq: [],
priority: [],
Expand Down
7 changes: 2 additions & 5 deletions package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "static-sitemap-cli",
"version": "2.0.1",
"version": "2.1.0",
"description": "CLI to generate XML sitemaps for static sites from local filesystem",
"author": "Jason Lee <[email protected]>",
"type": "module",
Expand Down Expand Up @@ -38,10 +38,7 @@
],
"license": "ISC",
"homepage": "https://npmjs.com/package/static-sitemap-cli",
"repository": {
"type": "git",
"url": "https://github.com/zerodevx/static-sitemap-cli.git"
},
"repository": "github:zerodevx/static-sitemap-cli",
"keywords": [
"sscli",
"sitemap",
Expand Down
1 change: 1 addition & 0 deletions src/cli.js
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ program
.description('CLI to generate XML sitemaps for static sites from local filesystem')
.option('-b, --base <url>', 'base URL (required)')
.option('-r, --root <dir>', 'root working directory', '.')
.option('-m, --match <glob...>', 'globs to match', ['**/*.html'])
.option('-i, --ignore <glob...>', 'globs to ignore', ['404.html'])
.option('-c, --changefreq <glob,changefreq...>', 'comma-separated glob-changefreq pairs')
.option('-p, --priority <glob,priority...>', 'comma-separated glob-priority pairs')
Expand Down
24 changes: 13 additions & 11 deletions src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ function log(msg) {
console.warn('\x1b[36m%s\x1b[0m', `[sscli] ${msg}`)
}

async function getFiles({ root, ignore, verbose }) {
const files = await fastglob('**/*.html', { cwd: root, stats: true, ignore })
async function getFiles({ root, match, ignore, verbose }) {
const files = await fastglob(match, { cwd: root, stats: true, ignore })
if (!files.length) {
throw new Error('NO_MATCHES')
}
Expand Down Expand Up @@ -43,16 +43,18 @@ function detectNoindex(path) {
}

async function transformUrl(
file,
{ path, stats: { mtime } },
{ root, base, changefreq, priority, robots, clean, slash, verbose }
) {
if (robots) {
if (await detectNoindex(nodepath.join(root, file.path))) {
if (verbose) log(`noindex: ${file.path}`)
return
}
if (
robots &&
nodepath.extname(path) === '.html' &&
(await detectNoindex(nodepath.join(root, path)))
) {
if (verbose) log(`noindex: ${path}`)
return
}
let url = base + file.path.split(nodepath.sep).join('/')
let url = base + path.split(nodepath.sep).join('/')
if (clean) {
if (url.slice(-11) === '/index.html') url = url.slice(0, -11)
else if (url.slice(-5) === '.html') url = url.slice(0, -5)
Expand All @@ -61,12 +63,12 @@ async function transformUrl(
const check = (pairs, tagname) => {
for (let a = pairs.length - 1; a >= 0; a--) {
const p = pairs[a].split(',')
if (micromatch.isMatch(file.path, p[0])) return { [tagname]: p[1] }
if (micromatch.isMatch(path, p[0])) return { [tagname]: p[1] }
}
}
return {
loc: url,
lastmod: file.stats.mtime.toISOString(),
lastmod: mtime.toISOString(),
...(changefreq && changefreq.length && check(changefreq, 'changefreq')),
...(priority && priority.length && check(priority, 'priority'))
}
Expand Down

0 comments on commit 75a4d84

Please sign in to comment.