Skip to content

Commit 7b101e8

Browse files
authored
feat(cli): add a subcommand to easily manage git hooks. (#98)
* feat(cli): add a subcommand to easily manage git hooks. * feat(hooks): add default hooks. * add documentation. * add `--force` option to `hooks` * test coverage
1 parent ba55a80 commit 7b101e8

File tree

8 files changed

+791
-0
lines changed

8 files changed

+791
-0
lines changed

docs/cli.md

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
* [Configuring VectorCode](#configuring-vectorcode)
1717
* [Vectorising Your Code](#vectorising-your-code)
1818
* [File Specs](#file-specs)
19+
* [Git Hooks](#git-hooks)
1920
* [Making a Query](#making-a-query)
2021
* [Listing All Collections](#listing-all-collections)
2122
* [Removing a Collection](#removing-a-collection)
@@ -353,6 +354,35 @@ on certain conditions. See
353354
[the wiki](https://github.com/Davidyz/VectorCode/wiki/Tips-and-Tricks#git-hooks)
354355
for an example to use it with git hooks.
355356

357+
#### Git Hooks
358+
359+
To keep the embeddings up-to-date, you may find it useful to set up some git
360+
hooks. The CLI provides a subcommand, `vectorcode hooks`, that helps you manage
361+
hooks when working with a git repository. You can put some custom hooks in
362+
`~/.config/vectorcode/hooks/` and the `vectorcode hooks` command will pick them
363+
up and append them to your existing hooks, or create new hook scripts if they
364+
don't exist yet. The hook files should be named the same as they would be under
365+
the `.git/hooks` directory. For example, a pre-commit hook would be named
366+
`~/.config/vectorcode/hooks/pre-commit`. By default, there are 2 pre-defined
367+
hooks:
368+
```bash
369+
# pre-commit hook that vectorise changed files before you commit.
370+
diff_files=$(git diff --cached --name-only)
371+
[ -z "$diff_files" ] || vectorcode vectorise $diff_files
372+
```
373+
```bash
374+
# post-checkout hook that vectorise changed files when you checkout to a
375+
# different branch/tag/commit
376+
files=$(git diff --name-only "$1" "$2")
377+
[ -z "$files" ] || vectorcode vectorise $files
378+
```
379+
When you run `vectorcode hooks` in a git repo, these 2 hooks will be added to
380+
your `.git/hooks/`. Hooks that are managed by VectorCode will be wrapped by
381+
`# VECTORCODE_HOOK_START` and `# VECTORCODE_HOOK_END` comment lines. They help
382+
VectorCode determine whether hooks have been added, so don't delete the markers
383+
unless you know what you're doing. To remove the hooks, simply delete the lines
384+
wrapped by these 2 comment strings.
385+
356386
### Making a Query
357387

358388
To retrieve a list of documents from the database, you can use the following command:

src/vectorcode/cli_utils.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ class CliAction(Enum):
5959
clean = "clean"
6060
prompts = "prompts"
6161
chunks = "chunks"
62+
hooks = "hooks"
6263

6364

6465
@dataclass
@@ -283,6 +284,16 @@ def get_cli_parser():
283284
)
284285

285286
subparsers.add_parser("drop", parents=[shared_parser], help="Remove a collection.")
287+
hooks_parser = subparsers.add_parser(
288+
"hooks", parents=[shared_parser], help="Inject git hooks."
289+
)
290+
hooks_parser.add_argument(
291+
"--force",
292+
"-f",
293+
action="store_true",
294+
default=False,
295+
help="Override existing VectorCode hooks.",
296+
)
286297

287298
init_parser = subparsers.add_parser(
288299
"init",
@@ -379,6 +390,8 @@ async def parse_cli_args(args: Optional[Sequence[str]] = None):
379390
configs_items["chunk_size"] = main_args.chunk_size
380391
configs_items["overlap_ratio"] = main_args.overlap
381392
configs_items["encoding"] = main_args.encoding
393+
case "hooks":
394+
configs_items["force"] = main_args.force
382395
return Config(**configs_items)
383396

384397

src/vectorcode/main.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,10 @@ async def async_main():
6262
from vectorcode.subcommands import chunks
6363

6464
return_val = await chunks(final_configs)
65+
case CliAction.hooks:
66+
from vectorcode.subcommands import hooks
67+
68+
return await hooks(cli_args)
6569

6670
from vectorcode.common import start_server, try_server
6771

src/vectorcode/subcommands/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from vectorcode.subcommands.chunks import chunks
33
from vectorcode.subcommands.clean import clean
44
from vectorcode.subcommands.drop import drop
5+
from vectorcode.subcommands.hooks import hooks
56
from vectorcode.subcommands.init import init
67
from vectorcode.subcommands.ls import ls
78
from vectorcode.subcommands.prompt import prompts
@@ -14,6 +15,7 @@
1415
"chunks",
1516
"clean",
1617
"drop",
18+
"hooks",
1719
"init",
1820
"ls",
1921
"prompts",
Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
import glob
2+
import logging
3+
import os
4+
import platform
5+
import re
6+
import stat
7+
from pathlib import Path
8+
from typing import Optional
9+
10+
from vectorcode.cli_utils import GLOBAL_CONFIG_PATH, Config, find_project_root
11+
12+
logger = logging.getLogger(name=__name__)
13+
__GLOBAL_HOOKS_PATH = Path(GLOBAL_CONFIG_PATH).parent / "hooks"
14+
15+
16+
# Keys: name of the hooks, ie. `pre-commit`
17+
# Values: lines of the hooks.
18+
__HOOK_CONTENTS: dict[str, list[str]] = {
19+
"pre-commit": [
20+
"diff_files=$(git diff --cached --name-only)",
21+
'[ -z "$diff_files" ] || vectorcode vectorise $diff_files',
22+
],
23+
"post-checkout": [
24+
'files=$(git diff --name-only "$1" "$2")',
25+
'[ -z "$files" ] || vectorcode vectorise $files',
26+
],
27+
}
28+
29+
30+
def __lines_are_empty(lines: list[str]) -> bool:
31+
pattern = re.compile(r"^\s*$")
32+
if len(lines) == 0:
33+
return True
34+
return all(map(lambda line: pattern.match(line) is not None, lines))
35+
36+
37+
def load_hooks():
38+
global __HOOK_CONTENTS
39+
for file in glob.glob(str(__GLOBAL_HOOKS_PATH / "*")):
40+
hook_name = Path(file).stem
41+
with open(file) as fin:
42+
lines = fin.readlines()
43+
if not __lines_are_empty(lines):
44+
__HOOK_CONTENTS[hook_name] = lines
45+
46+
47+
class HookFile:
48+
prefix = "# VECTORCODE_HOOK_START"
49+
suffix = "# VECTORCODE_HOOK_END"
50+
prefix_pattern = re.compile(r"^\s*#\s*VECTORCODE_HOOK_START\s*")
51+
suffix_pattern = re.compile(r"^\s*#\s*VECTORCODE_HOOK_END\s*")
52+
53+
def __init__(self, path: str | Path, git_dir: Optional[str | Path] = None):
54+
self.path = path
55+
self.lines: list[str] = []
56+
if os.path.isfile(self.path):
57+
with open(self.path) as fin:
58+
self.lines.extend(fin.readlines())
59+
60+
def has_vectorcode_hooks(self, force: bool = False) -> bool:
61+
for start, start_line in enumerate(self.lines):
62+
if self.prefix_pattern.match(start_line) is None:
63+
continue
64+
65+
for end in range(start + 1, len(self.lines)):
66+
if self.suffix_pattern.match(self.lines[end]) is not None:
67+
if force:
68+
logger.debug("`force` cleaning existing VectorCode hooks...")
69+
new_lines = self.lines[:start] + self.lines[end + 1 :]
70+
self.lines[:] = new_lines
71+
return False
72+
logger.debug(
73+
f"Found vectorcode hook block between line {start} and {end} in {self.path}:\n{''.join(self.lines[start + 1 : end])}"
74+
)
75+
return True
76+
77+
return False
78+
79+
def inject_hook(self, content: list[str], force: bool = False):
80+
if len(self.lines) == 0 or not self.has_vectorcode_hooks(force):
81+
self.lines.append(self.prefix + "\n")
82+
self.lines.extend(i if i.endswith("\n") else i + "\n" for i in content)
83+
self.lines.append(self.suffix + "\n")
84+
with open(self.path, "w") as fin:
85+
fin.writelines(self.lines)
86+
if platform.system() != "Windows":
87+
# for unix systems, set the executable bit.
88+
curr_mode = os.stat(self.path).st_mode
89+
os.chmod(self.path, mode=curr_mode | stat.S_IXUSR)
90+
91+
92+
async def hooks(configs: Config) -> int:
93+
project_root = configs.project_root or "."
94+
git_root = find_project_root(project_root, ".git")
95+
if git_root is None:
96+
logger.error(f"{project_root} is not inside a git repo directory!")
97+
return 1
98+
load_hooks()
99+
for hook in __HOOK_CONTENTS.keys():
100+
hook_file_path = os.path.join(git_root, ".git", "hooks", hook)
101+
logger.info(f"Writing {hook} hook into {hook_file_path}.")
102+
print(f"Processing {hook} hook...")
103+
hook_obj = HookFile(hook_file_path, git_dir=git_root)
104+
hook_obj.inject_hook(__HOOK_CONTENTS[hook], configs.force)
105+
return 0

0 commit comments

Comments
 (0)