Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,29 @@ services:
volumes:
- sourcebot_redis_data:/data

typesense:
image: typesense/typesense:26.0
restart: always
ports:
- "8108:8108"
environment:
- TYPESENSE_DATA_DIR=/data
- TYPESENSE_API_KEY=${TYPESENSE_API_KEY:-xyz}
- TYPESENSE_ENABLE_CORS=true
volumes:
- sourcebot_typesense_data:/data
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8108/health"]
interval: 3s
timeout: 3s
retries: 10

volumes:
sourcebot_data:
driver: local
sourcebot_postgres_data:
driver: local
sourcebot_redis_data:
driver: local
sourcebot_typesense_data:
driver: local
8 changes: 8 additions & 0 deletions docs/snippets/schemas/v3/connection.schema.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -587,6 +587,14 @@
],
"pattern": "^https?:\\/\\/[^\\s/$.?#].[^\\s]*$"
},
"username": {
"type": "string",
"description": "The username to use for authentication."
},
"password": {
"type": "string",
"description": "The password (or HTTP password) to use for authentication."
},
"projects": {
"type": "array",
"items": {
Expand Down
8 changes: 8 additions & 0 deletions docs/snippets/schemas/v3/gerrit.schema.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,14 @@
],
"pattern": "^https?:\\/\\/[^\\s/$.?#].[^\\s]*$"
},
"username": {
"type": "string",
"description": "The username to use for authentication."
},
"password": {
"type": "string",
"description": "The password (or HTTP password) to use for authentication."
},
"projects": {
"type": "array",
"items": {
Expand Down
8 changes: 8 additions & 0 deletions docs/snippets/schemas/v3/index.schema.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -982,6 +982,14 @@
],
"pattern": "^https?:\\/\\/[^\\s/$.?#].[^\\s]*$"
},
"username": {
"type": "string",
"description": "The username to use for authentication."
},
"password": {
"type": "string",
"description": "The password (or HTTP password) to use for authentication."
},
"projects": {
"type": "array",
"items": {
Expand Down
4 changes: 4 additions & 0 deletions packages/backend/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,13 @@
},
"devDependencies": {
"@types/argparse": "^2.0.16",
"@types/jest": "^30.0.0",
"@types/micromatch": "^4.0.9",
"@types/node": "^22.7.5",
"cross-env": "^7.0.3",
"jest": "^30.2.0",
"json-schema-to-typescript": "^15.0.4",
"ts-jest": "^29.4.6",
"tsc-watch": "^6.2.0",
"tsx": "^4.19.1",
"typescript": "^5.6.2",
Expand Down Expand Up @@ -53,6 +56,7 @@
"posthog-node": "^5.17.4",
"prom-client": "^15.1.3",
"simple-git": "^3.27.0",
"typesense": "^2.1.0",
"zod": "^3.25.74"
}
}
28 changes: 28 additions & 0 deletions packages/backend/src/api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,22 @@ import z from 'zod';
import { ConnectionManager } from './connectionManager.js';
import { PromClient } from './promClient.js';
import { RepoIndexManager } from './repoIndexManager.js';
import { TypesenseService } from './search/typesense.js';

const logger = createLogger('api');
const PORT = 3060;

export class Api {
private server: http.Server;
private typesenseService: TypesenseService;

constructor(
promClient: PromClient,
private prisma: PrismaClient,
private connectionManager: ConnectionManager,
private repoIndexManager: RepoIndexManager,
) {
this.typesenseService = new TypesenseService();
const app = express();
app.use(express.json());
app.use(express.urlencoded({ extended: true }));
Expand All @@ -33,12 +36,37 @@ export class Api {

app.post('/api/sync-connection', this.syncConnection.bind(this));
app.post('/api/index-repo', this.indexRepo.bind(this));
app.get('/api/search/fuzzy', this.fuzzySearch.bind(this));

this.server = app.listen(PORT, () => {
logger.info(`API server is running on port ${PORT}`);
});
}

private async fuzzySearch(req: Request, res: Response) {
const schema = z.object({
q: z.string().min(1),
type: z.enum(['repo', 'file', 'commit']).optional().default('repo'),
repoId: z.string().transform(val => parseInt(val)).optional(),
});

const parsed = schema.safeParse(req.query);
if (!parsed.success) {
res.status(400).json({ error: parsed.error.message });
return;
}

const { q, type, repoId } = parsed.data;

try {
const results = await this.typesenseService.search(q, type, repoId);
res.status(200).json(results);
} catch (error) {
logger.error('Fuzzy search failed', error);
res.status(500).json({ error: 'Internal server error during search' });
}
}

private async syncConnection(req: Request, res: Response) {
const schema = z.object({
connectionId: z.number(),
Expand Down
28 changes: 28 additions & 0 deletions packages/backend/src/git.ts
Original file line number Diff line number Diff line change
Expand Up @@ -297,4 +297,32 @@ export const getCommitHashForRefName = async ({
logger.debug(error);
return undefined;
}
}

export const getAllFiles = async (path: string) => {
const git = createGitClientForPath(path);
try {
const files = await git.raw(['ls-tree', '-r', '--name-only', 'HEAD']);
return files.split('\n').filter(f => f.length > 0);
} catch (err) {
logger.error(`Failed to get files for ${path}`, err);
return [];
}
}

export const getRecentCommits = async (path: string, limit = 100) => {
const git = createGitClientForPath(path);
try {
const log = await git.log({ maxCount: limit });
return log.all.map(commit => ({
hash: commit.hash,
date: new Date(commit.date).getTime(),
message: commit.message,
author_name: commit.author_name,
author_email: commit.author_email,
}));
} catch (err) {
logger.error(`Failed to get commits for ${path}`, err);
return [];
}
}
22 changes: 20 additions & 2 deletions packages/backend/src/repoIndexManager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,10 @@ import { Job, Queue, ReservedJob, Worker } from "groupmq";
import { Redis } from 'ioredis';
import micromatch from 'micromatch';
import { GROUPMQ_WORKER_STOP_GRACEFUL_TIMEOUT_MS, INDEX_CACHE_DIR } from './constants.js';
import { cloneRepository, fetchRepository, getBranches, getCommitHashForRefName, getTags, isPathAValidGitRepoRoot, unsetGitConfig, upsertGitConfig } from './git.js';
import { cloneRepository, fetchRepository, getAllFiles, getBranches, getCommitHashForRefName, getRecentCommits, getTags, isPathAValidGitRepoRoot, unsetGitConfig, upsertGitConfig } from './git.js';
import { captureEvent } from './posthog.js';
import { PromClient } from './promClient.js';
import { TypesenseService } from './search/typesense.js';
import { RepoWithConnections, Settings } from "./types.js";
import { getAuthCredentialsForRepo, getRepoPath, getShardPrefix, groupmqLifecycleExceptionWrapper, measure, setIntervalAsync } from './utils.js';
import { indexGitRepository } from './zoekt.js';
Expand Down Expand Up @@ -41,13 +42,15 @@ export class RepoIndexManager {
private interval?: NodeJS.Timeout;
private queue: Queue<JobPayload>;
private worker: Worker<JobPayload>;
private typesenseService: TypesenseService;

constructor(
private db: PrismaClient,
private settings: Settings,
private redis: Redis,
private promClient: PromClient,
) {
this.typesenseService = new TypesenseService();
this.queue = new Queue<JobPayload>({
redis,
namespace: 'repo-index-queue',
Expand Down Expand Up @@ -242,7 +245,7 @@ export class RepoIndexManager {
status: true,
}
});

// Fail safe: if the job is not PENDING (first run) or IN_PROGRESS (retry), it indicates the job
// is in an invalid state and should be skipped.
if (
Expand Down Expand Up @@ -440,6 +443,21 @@ export class RepoIndexManager {
const indexDuration_s = durationMs / 1000;
logger.info(`Indexed ${repo.name} (id: ${repo.id}) in ${indexDuration_s}s`);

// Trigger Typesense indexing asynchronously (don't block main flow or fail job if it fails)
Promise.all([
this.typesenseService.indexRepo(repo),
(async () => {
const files = await getAllFiles(repoPath);
await this.typesenseService.indexFiles(repo.id, files);
})(),
(async () => {
const commits = await getRecentCommits(repoPath);
await this.typesenseService.indexCommits(repo.id, commits);
})()
]).catch(err => {
logger.error(`Failed to index metadata for ${repo.name} in Typesense`, err);
});

return revisions;
}

Expand Down
140 changes: 140 additions & 0 deletions packages/backend/src/search/typesense.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
import { TypesenseService } from './typesense';
import { describe, it, expect, vi, beforeEach } from 'vitest';

// Mock typesense client
vi.mock('typesense', () => {
return {
Client: vi.fn().mockImplementation(() => ({
collections: vi.fn().mockReturnThis(),
retrieve: vi.fn().mockResolvedValue([]),
create: vi.fn().mockResolvedValue({}),
documents: vi.fn().mockReturnThis(),
upsert: vi.fn().mockResolvedValue({}),
import: vi.fn().mockResolvedValue([]),
search: vi.fn().mockResolvedValue({ hits: [] }),
}))
};
});

// Mock shared env
vi.mock('@sourcebot/shared', () => ({
createLogger: () => ({
info: vi.fn(),
error: vi.fn(),
debug: vi.fn(),
}),
env: {
TYPESENSE_HOST: 'localhost',
TYPESENSE_PORT: '8108',
TYPESENSE_API_KEY: 'xyz'
}
}));

describe('TypesenseService', () => {
let service: TypesenseService;
let mockClient: any;

beforeEach(() => {
// Clear all mocks
vi.clearAllMocks();
service = new TypesenseService();
mockClient = (service as any).client;
});

it('should initialize and create collections if they do not exist', async () => {
// Mock retrieve to return empty array (no collections exist)
mockClient.collections().retrieve.mockResolvedValue([]);

await service.initialize();

expect(mockClient.collections().create).toHaveBeenCalledTimes(3);
expect(mockClient.collections().create).toHaveBeenCalledWith(expect.objectContaining({ name: 'repos' }));
expect(mockClient.collections().create).toHaveBeenCalledWith(expect.objectContaining({ name: 'files' }));
expect(mockClient.collections().create).toHaveBeenCalledWith(expect.objectContaining({ name: 'commits' }));
});

it('should not create collections if they already exist', async () => {
// Mock retrieve to return existing collections
mockClient.collections().retrieve.mockResolvedValue([
{ name: 'repos' },
{ name: 'files' },
{ name: 'commits' }
]);

await service.initialize();

expect(mockClient.collections().create).not.toHaveBeenCalled();
});

it('should index a repo', async () => {
const repo: any = {
id: 1,
name: 'org/repo',
cloneUrl: 'https://github.com/org/repo.git'
};

await service.indexRepo(repo);

expect(mockClient.collections).toHaveBeenCalledWith('repos');
expect(mockClient.documents).toHaveBeenCalled();
expect(mockClient.upsert).toHaveBeenCalledWith(expect.objectContaining({
id: 'repo_1',
repo_id: 1,
name: 'org/repo',
organization: 'org'
}));
});

it('should index files in batches', async () => {
const filePaths = ['src/index.ts', 'package.json'];
await service.indexFiles(1, filePaths);

expect(mockClient.collections).toHaveBeenCalledWith('files');
expect(mockClient.import).toHaveBeenCalledWith([
expect.objectContaining({ filename: 'index.ts', path: 'src/index.ts', extension: 'ts' }),
expect.objectContaining({ filename: 'package.json', path: 'package.json', extension: 'json' })
], { action: 'upsert' });
});

it('should index commits', async () => {
const commits = [{
hash: 'abc1234',
message: 'feat: typesense',
author_name: 'Dev',
author_email: 'dev@example.com',
date: 1234567890
}];

await service.indexCommits(1, commits);

expect(mockClient.collections).toHaveBeenCalledWith('commits');
expect(mockClient.import).toHaveBeenCalledWith([
expect.objectContaining({
id: 'commit_1_abc1234',
hash: 'abc1234',
message: 'feat: typesense'
})
], { action: 'upsert' });
});

it('should search repos', async () => {
await service.search('query', 'repo');

expect(mockClient.collections).toHaveBeenCalledWith('repos');
expect(mockClient.search).toHaveBeenCalledWith(expect.objectContaining({
q: 'query',
query_by: 'name,organization'
}));
});

it('should search files with repoId filter', async () => {
await service.search('query', 'file', 123);

expect(mockClient.collections).toHaveBeenCalledWith('files');
expect(mockClient.search).toHaveBeenCalledWith(expect.objectContaining({
q: 'query',
query_by: 'filename,path',
filter_by: 'repo_id:=123'
}));
});
});
Loading