From 08e03603d1d1b03249c4f20572c00fe3036c4ed1 Mon Sep 17 00:00:00 2001 From: elijahpetty Date: Fri, 23 Jan 2026 13:51:15 -0600 Subject: [PATCH] . --- README.md | 325 +++++++++++------- data/batch/file1.csv | 4 + data/batch/file2.csv | 4 + data/batch/file3.csv | 5 + data/sample.csv | 11 + my_dh_cli/README.md | 57 +++ my_dh_cli/pyproject.toml | 20 ++ my_dh_cli/src/my_dh_cli.egg-info/PKG-INFO | 59 ++++ my_dh_cli/src/my_dh_cli.egg-info/SOURCES.txt | 11 + .../my_dh_cli.egg-info/dependency_links.txt | 1 + .../src/my_dh_cli.egg-info/entry_points.txt | 2 + my_dh_cli/src/my_dh_cli.egg-info/requires.txt | 2 + .../src/my_dh_cli.egg-info/top_level.txt | 1 + my_dh_cli/src/my_dh_cli/__init__.py | 3 + my_dh_cli/src/my_dh_cli/__main__.py | 4 + .../__pycache__/__init__.cpython-312.pyc | Bin 0 -> 267 bytes .../my_dh_cli/__pycache__/cli.cpython-312.pyc | Bin 0 -> 1606 bytes my_dh_cli/src/my_dh_cli/cli.py | 31 ++ my_dh_library/README.md | 55 +++ my_dh_library/pyproject.toml | 16 + .../src/my_dh_library.egg-info/PKG-INFO | 55 +++ .../src/my_dh_library.egg-info/SOURCES.txt | 10 + .../dependency_links.txt | 1 + .../src/my_dh_library.egg-info/requires.txt | 1 + .../src/my_dh_library.egg-info/top_level.txt | 1 + my_dh_library/src/my_dh_library/__init__.py | 7 + .../__pycache__/__init__.cpython-312.pyc | Bin 0 -> 427 bytes .../__pycache__/queries.cpython-312.pyc | Bin 0 -> 1355 bytes my_dh_library/src/my_dh_library/queries.py | 30 ++ my_dh_library/src/my_dh_library/utils.py | 18 + my_dh_toolkit/README.md | 104 ++++++ my_dh_toolkit/pyproject.toml | 21 ++ my_dh_toolkit/src/my_dh_toolkit/__init__.py | 8 + my_dh_toolkit/src/my_dh_toolkit/__main__.py | 4 + my_dh_toolkit/src/my_dh_toolkit/cli.py | 31 ++ my_dh_toolkit/src/my_dh_toolkit/processor.py | 40 +++ my_dh_toolkit/src/my_dh_toolkit/queries.py | 30 ++ my_dh_toolkit/src/my_dh_toolkit/utils.py | 18 + 38 files changed, 871 insertions(+), 119 deletions(-) create mode 100644 data/batch/file1.csv create mode 100644 data/batch/file2.csv create mode 100644 data/batch/file3.csv create mode 100644 data/sample.csv create mode 100644 my_dh_cli/README.md create mode 100644 my_dh_cli/pyproject.toml create mode 100644 my_dh_cli/src/my_dh_cli.egg-info/PKG-INFO create mode 100644 my_dh_cli/src/my_dh_cli.egg-info/SOURCES.txt create mode 100644 my_dh_cli/src/my_dh_cli.egg-info/dependency_links.txt create mode 100644 my_dh_cli/src/my_dh_cli.egg-info/entry_points.txt create mode 100644 my_dh_cli/src/my_dh_cli.egg-info/requires.txt create mode 100644 my_dh_cli/src/my_dh_cli.egg-info/top_level.txt create mode 100644 my_dh_cli/src/my_dh_cli/__init__.py create mode 100644 my_dh_cli/src/my_dh_cli/__main__.py create mode 100644 my_dh_cli/src/my_dh_cli/__pycache__/__init__.cpython-312.pyc create mode 100644 my_dh_cli/src/my_dh_cli/__pycache__/cli.cpython-312.pyc create mode 100644 my_dh_cli/src/my_dh_cli/cli.py create mode 100644 my_dh_library/README.md create mode 100644 my_dh_library/pyproject.toml create mode 100644 my_dh_library/src/my_dh_library.egg-info/PKG-INFO create mode 100644 my_dh_library/src/my_dh_library.egg-info/SOURCES.txt create mode 100644 my_dh_library/src/my_dh_library.egg-info/dependency_links.txt create mode 100644 my_dh_library/src/my_dh_library.egg-info/requires.txt create mode 100644 my_dh_library/src/my_dh_library.egg-info/top_level.txt create mode 100644 my_dh_library/src/my_dh_library/__init__.py create mode 100644 my_dh_library/src/my_dh_library/__pycache__/__init__.cpython-312.pyc create mode 100644 my_dh_library/src/my_dh_library/__pycache__/queries.cpython-312.pyc create mode 100644 my_dh_library/src/my_dh_library/queries.py create mode 100644 my_dh_library/src/my_dh_library/utils.py create mode 100644 my_dh_toolkit/README.md create mode 100644 my_dh_toolkit/pyproject.toml create mode 100644 my_dh_toolkit/src/my_dh_toolkit/__init__.py create mode 100644 my_dh_toolkit/src/my_dh_toolkit/__main__.py create mode 100644 my_dh_toolkit/src/my_dh_toolkit/cli.py create mode 100644 my_dh_toolkit/src/my_dh_toolkit/processor.py create mode 100644 my_dh_toolkit/src/my_dh_toolkit/queries.py create mode 100644 my_dh_toolkit/src/my_dh_toolkit/utils.py diff --git a/README.md b/README.md index c12bb06..dbaf300 100644 --- a/README.md +++ b/README.md @@ -1,26 +1,31 @@ # Python Packaging with Deephaven -This example demonstrates how to create and deploy Python packages that use Deephaven. It shows you how to package both command-line tools and reusable libraries using modern Python packaging standards. +This repository demonstrates how to create and deploy Python packages that use Deephaven. It shows three complete packaging scenarios following the official [Python Packaging User Guide](https://packaging.python.org/en/latest/guides/writing-pyproject-toml/) recommendations. This example accompanies the [Packaging custom code and dependencies](https://deephaven.io/core/docs/how-to-guides/sysadmin/setuptools-deployment/) guide in the Deephaven documentation. ## What you'll learn -This example shows you how to: - - Create installable Python packages with Deephaven dependencies -- Build command-line tools that process data with Deephaven - Package reusable library code for other projects +- Build command-line tools with entry point scripts - Manage dependencies with `pyproject.toml` +- Use the src-layout structure - Distribute packages as wheel archives -## Project structure +## Prerequisites + +- Python 3.8 or later +- pip (Python package installer) +- Basic familiarity with Python packaging + +## Repository structure -The example includes three complete packaging scenarios: +This repository contains three complete packaging scenarios: ### 1. Library-only package (`my_dh_library/`) -A reusable library with Deephaven query functions that other projects can import. +Reusable library code without CLI tools. Other projects import your modules. ``` my_dh_library/ @@ -33,68 +38,74 @@ my_dh_library/ └── README.md ``` +**Usage:** +```python +from my_dh_library.queries import filter_by_threshold +``` + ### 2. CLI-only package (`my_dh_cli/`) -Command-line tools for processing data with Deephaven. +Command-line tool without exposing library code. ``` my_dh_cli/ ├── src/ -│ └── my_dh_package/ +│ └── my_dh_cli/ │ ├── __init__.py │ ├── __main__.py -│ ├── cli.py -│ └── processor.py +│ └── cli.py ├── pyproject.toml -├── data/ -│ └── sample.csv └── README.md ``` +**Usage:** +```python +# Use within a Python session with server running +from my_dh_cli.cli import my_dh_query +result = my_dh_query("input_data.csv", verbose=True) +``` + ### 3. Combined package (`my_dh_toolkit/`) -Both reusable library code and command-line tools in one package. +Both reusable library code and command-line tools. ``` my_dh_toolkit/ ├── src/ -│ └── my_dh_package/ +│ └── my_dh_toolkit/ │ ├── __init__.py │ ├── __main__.py │ ├── cli.py +│ ├── processor.py │ ├── queries.py │ └── utils.py ├── pyproject.toml └── README.md ``` -## Prerequisites +**Usage:** +```python +# As a library +from my_dh_toolkit.queries import filter_by_threshold +``` -- Python 3.8 or later -- pip (Python package installer) -- Basic familiarity with Python packaging +```python +# As CLI functions (within Python session) +from my_dh_toolkit import my_dh_query, batch_process +result = my_dh_query("input_data.csv", verbose=True) +batch_process("data/", "results/", verbose=True) +``` ## Quick start Clone the repository: ```shell -git clone https://github.com/deephaven-examples/python-packaging.git -cd python-packaging +git clone https://github.com/deephaven-examples/deephaven-python-packaging.git +cd deephaven-python-packaging ``` -Choose an example to try: - -### Try the CLI package - -```shell -cd my_dh_cli -pip install -e . -my-dh-query data/sample.csv --verbose -my-dh-process data/ --output results/ -``` - -### Try the library package +### Try the library-only package ```shell cd my_dh_library @@ -105,173 +116,249 @@ python Then in Python: ```python +# Start the Deephaven server +from deephaven_server import Server +server = Server(port=10000, jvm_args=["-Xmx4g"]) +server.start() + +# Now use the library functions from my_dh_library.queries import filter_by_threshold from deephaven import read_csv data = read_csv("../data/sample.csv") filtered = filter_by_threshold(data, "Score", 75.0) -print(f"Filtered to {filtered.size} rows") +``` + +### Try the CLI-only package + +> [!NOTE] +> CLI tools require a Deephaven server running in the same Python process. The examples below show how to use the CLI functions within a Python session where the server is already started. True standalone CLI commands (run from a separate terminal) are not practical with Deephaven due to JVM initialization requirements. + +```shell +cd my_dh_cli +pip install -e . +python +``` + +Then in Python: + +```python +# Start the Deephaven server +from deephaven_server import Server +server = Server(port=10000, jvm_args=["-Xmx4g"]) +server.start() + +# Now use the CLI function +from my_dh_cli.cli import my_dh_query +result = my_dh_query("../data/sample.csv", verbose=True) +print(f"Processed {result.size} rows") ``` ### Try the combined package +> [!NOTE] +> Like the CLI-only package, the CLI commands require a Deephaven server in the same Python process. Use the library functions within a Python session. + ```shell cd my_dh_toolkit pip install -e . +python +``` + +Then in Python: + +```python +# Start the Deephaven server +from deephaven_server import Server +server = Server(port=10000, jvm_args=["-Xmx4g"]) +server.start() # Use as a library -python -c "from my_dh_toolkit.queries import filter_by_threshold; print('Library imported successfully')" +from my_dh_toolkit.queries import filter_by_threshold +from deephaven import read_csv -# Use as CLI tools -my-dh-query ../data/sample.csv -my-dh-process ../data/ --output results/ +data = read_csv("../data/sample.csv") +filtered = filter_by_threshold(data, "Score", 75.0) + +# Or use the CLI functions +from my_dh_toolkit import my_dh_query, batch_process +result = my_dh_query("../data/sample.csv", verbose=True) +batch_process("../data/batch/", "./output", verbose=True) ``` -## What's included +## Sample data -### Command-line tools +The `data/` directory contains sample CSV files for testing: -The CLI examples demonstrate: +- `sample.csv` - Single file with Name, Score, Value, and Category columns +- `batch/` - Multiple CSV files for batch processing examples -- **Entry point scripts** - Commands installed to your PATH -- **Module execution** - Running with `python -m package_name` -- **Argument parsing** - Using Click for robust CLI interfaces -- **Multiple commands** - Single package with multiple tools -- **Verbose output** - Optional detailed logging +## Key concepts + +### Package structure -### Library modules +All examples use the **src-layout**, which is the recommended structure for Python packages: -The library examples show: +``` +my_project/ +├── src/ +│ └── my_package/ +│ ├── __init__.py +│ └── module.py +├── pyproject.toml +└── README.md +``` -- **Reusable query functions** - Common Deephaven operations -- **Type hints** - Proper function signatures -- **Public API exports** - Clean import patterns -- **Documentation** - Docstrings for all functions +The src-layout keeps source code separate from tests and configuration files. -### Configuration +### Entry point scripts -All examples include: +Entry point scripts are defined in `[project.scripts]` and become available after installation: -- **`pyproject.toml`** - Modern Python packaging configuration -- **Dependency management** - Automatic installation of Deephaven and other requirements -- **Version constraints** - Ensuring compatible package versions -- **Entry points** - Mapping command names to Python functions +```toml +[project.scripts] +my-command = "my_package.module:function" +``` -## Building and distributing +After `pip install`, you can run `my-command` from anywhere. -Each example can be built into a distributable wheel: +### Module execution -```shell -cd my_dh_cli # or any example directory -pip install build -python -m build -``` +Add a `__main__.py` file to support running packages with `python -m`: -This creates a `.whl` file in the `dist/` directory that can be: +```python +from my_package.cli import app -- Installed locally: `pip install dist/my_dh_cli-0.1.0-py3-none-any.whl` -- Distributed to others -- Published to PyPI: `python -m twine upload dist/*` +if __name__ == "__main__": + app() +``` -## Running the examples +This allows running without installation: `python -m my_package` -### Development mode +### Dependencies -Install in editable mode to make changes without reinstalling: +Dependencies are specified in `pyproject.toml`: -```shell -pip install -e . +```toml +[project] +dependencies = [ + "deephaven-server>=0.35.0", + "click>=8.0.0", +] ``` -### Regular installation +These are automatically installed when users install your package. -Install from the built wheel: +## Building and distributing + +Build a distributable wheel: ```shell -pip install dist/package_name-0.1.0-py3-none-any.whl +cd my_dh_cli # or any package directory +pip install build +python -m build ``` -### Without installation +This creates a `.whl` file in `dist/` that can be: -Run directly from source using module execution: +- Installed locally: `pip install dist/my_dh_cli-0.1.0-py3-none-any.whl` +- Distributed to others +- Published to PyPI: `python -m twine upload dist/*` -```shell -python -m my_dh_package input_data.csv -``` +## Packaging scenarios -## Sample data +### When to use library-only -The `data/` directory contains sample CSV files for testing: +- Creating reusable code for other projects +- No command-line interface needed +- Code will be imported, not executed -- `sample.csv` - Small dataset with Name, Age, and Score columns -- `batch/` - Multiple CSV files for batch processing examples +**Example:** Data processing utilities, query functions, helper classes -You can use your own CSV files with these examples. +### When to use CLI-only -## Key concepts +- Building command-line tools for end users +- No library code to expose +- Want clean command names -### Entry point scripts vs module execution +**Example:** Data conversion tools, file processors, automation scripts -The examples demonstrate two ways to run Python packages: +### When to use combined -1. **Entry point scripts** - Commands defined in `[project.scripts]` that become available after installation - ```shell - my-dh-query data.csv - ``` +- Need both library and CLI functionality +- Want to provide multiple interfaces +- Library functions useful on their own -2. **Module execution** - Running packages with `python -m` without installation - ```shell - python -m my_dh_package data.csv - ``` +**Example:** Data analysis toolkit with both API and CLI -See the [Execution patterns](https://deephaven.io/core/docs/how-to-guides/sysadmin/setuptools-deployment/#execution-patterns) section of the guide for when to use each method. +## Execution patterns -### Package structure +### Entry point scripts (recommended for CLI tools) -All examples use the **src-layout**, which is the recommended structure for Python packages. This keeps source code separate from tests and configuration files. +**Configure in `pyproject.toml`:** +```toml +[project.scripts] +my-command = "my_package.module:function" +``` -### Dependencies +**Run after installation:** +```shell +my-command +``` -The examples show how to: +**Benefits:** +- Clean command names +- Available system-wide +- Standard Python packaging approach -- Specify required packages (like `deephaven-server`) -- Set version constraints -- Define optional dependencies for features like visualization or testing +### Module execution (useful for development) -## Related documentation +**Add `__main__.py`:** +```python +from my_package.cli import app -- [Packaging custom code and dependencies](https://deephaven.io/core/docs/how-to-guides/sysadmin/setuptools-deployment/) - Complete guide -- [Install and use Python packages](https://deephaven.io/core/docs/how-to-guides/install-and-use-python-packages/) -- [Use the Deephaven Python package](https://deephaven.io/core/docs/how-to-guides/deephaven-python-package/) -- [Python Packaging User Guide](https://packaging.python.org/en/latest/guides/writing-pyproject-toml/) -- [Click documentation](https://click.palletsprojects.com/) +if __name__ == "__main__": + app() +``` + +**Run without installation:** +```shell +python -m my_package +``` + +**Benefits:** +- No installation required +- Useful for development and testing +- Works from source directory ## Troubleshooting ### Command not found after installation -If your command isn't found after installation: - -- Ensure the installation completed without errors +- Ensure installation completed without errors - Check that the installation directory is in your PATH - Try reinstalling: `pip install --force-reinstall .` ### Import errors -If you encounter import errors: - - Verify all dependencies are installed: `pip list` - Check that you're using Python 3.8 or later - Ensure Deephaven is installed: `pip install deephaven-server` ### Module not found errors -If Python can't find your modules: - - Verify `__init__.py` files exist in all package directories - Check that package names in `[project.scripts]` match your directory structure - Try reinstalling in editable mode: `pip install -e .` +## Related documentation + +- [Packaging custom code and dependencies](https://deephaven.io/core/docs/how-to-guides/sysadmin/setuptools-deployment/) - Complete guide +- [Install and use Python packages](https://deephaven.io/core/docs/how-to-guides/install-and-use-python-packages/) +- [Use the Deephaven Python package](https://deephaven.io/core/docs/how-to-guides/deephaven-python-package/) +- [Python Packaging User Guide](https://packaging.python.org/en/latest/guides/writing-pyproject-toml/) +- [Click documentation](https://click.palletsprojects.com/) + ## Note The code in this repository is built for Deephaven Community Core v0.35.0 or later. For the latest Deephaven version, see [deephaven.io](https://deephaven.io/). diff --git a/data/batch/file1.csv b/data/batch/file1.csv new file mode 100644 index 0000000..fe9fae6 --- /dev/null +++ b/data/batch/file1.csv @@ -0,0 +1,4 @@ +Name,Score,Value,Category +Alice,85,120,A +Bob,92,150,B +Charlie,78,95,A diff --git a/data/batch/file2.csv b/data/batch/file2.csv new file mode 100644 index 0000000..e270a59 --- /dev/null +++ b/data/batch/file2.csv @@ -0,0 +1,4 @@ +Name,Score,Value,Category +Diana,88,110,C +Eve,95,180,B +Frank,72,85,A diff --git a/data/batch/file3.csv b/data/batch/file3.csv new file mode 100644 index 0000000..692c4a2 --- /dev/null +++ b/data/batch/file3.csv @@ -0,0 +1,5 @@ +Name,Score,Value,Category +Grace,91,160,C +Henry,83,105,B +Iris,89,140,A +Jack,76,90,C diff --git a/data/sample.csv b/data/sample.csv new file mode 100644 index 0000000..1d1b856 --- /dev/null +++ b/data/sample.csv @@ -0,0 +1,11 @@ +Name,Score,Value,Category +Alice,85,120,A +Bob,92,150,B +Charlie,78,95,A +Diana,88,110,C +Eve,95,180,B +Frank,72,85,A +Grace,91,160,C +Henry,83,105,B +Iris,89,140,A +Jack,76,90,C diff --git a/my_dh_cli/README.md b/my_dh_cli/README.md new file mode 100644 index 0000000..8e79176 --- /dev/null +++ b/my_dh_cli/README.md @@ -0,0 +1,57 @@ +# My Deephaven CLI + +A CLI-only package providing command-line tools for data processing with Deephaven. This package is designed to be installed and run as a command-line tool. + +## Installation + +```shell +pip install . +``` + +Or in editable mode for development: + +```shell +pip install -e . +``` + +## Usage + +> [!NOTE] +> CLI functions require a Deephaven server running in the same Python process. Use the functions within a Python session where the server is already started. + +```shell +pip install -e . +python +``` + +Then in Python: + +```python +# Start the Deephaven server +from deephaven_server import Server +server = Server(port=10000, jvm_args=["-Xmx4g"]) +server.start() + +# Now use the CLI function +from my_dh_cli.cli import my_dh_query +result = my_dh_query("../data/sample.csv", verbose=True) +print(f"Processed {result.size} rows") +``` + +## Commands + +### my-dh-query + +Process a CSV file with Deephaven. + +**Arguments:** +- `input_file` - Path to the CSV file to process + +**Options:** +- `--verbose, -v` - Enable verbose output + +## Requirements + +- Python 3.8 or later +- Deephaven Server 0.35.0 or later +- Click 8.0.0 or later diff --git a/my_dh_cli/pyproject.toml b/my_dh_cli/pyproject.toml new file mode 100644 index 0000000..32c7f84 --- /dev/null +++ b/my_dh_cli/pyproject.toml @@ -0,0 +1,20 @@ +[build-system] +requires = ["setuptools>=61.0", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "my_dh_cli" +version = "0.1.0" +description = "Command-line tool for data processing" +readme = "README.md" +requires-python = ">=3.8" +dependencies = [ + "deephaven-server>=0.35.0", + "click>=8.0.0", +] + +[project.scripts] +my-dh-query = "my_dh_cli.cli:app" + +[tool.setuptools.packages.find] +where = ["src"] diff --git a/my_dh_cli/src/my_dh_cli.egg-info/PKG-INFO b/my_dh_cli/src/my_dh_cli.egg-info/PKG-INFO new file mode 100644 index 0000000..88f54ca --- /dev/null +++ b/my_dh_cli/src/my_dh_cli.egg-info/PKG-INFO @@ -0,0 +1,59 @@ +Metadata-Version: 2.4 +Name: my_dh_cli +Version: 0.1.0 +Summary: Command-line tool for data processing +Requires-Python: >=3.8 +Description-Content-Type: text/markdown +Requires-Dist: deephaven-server>=0.35.0 +Requires-Dist: click>=8.0.0 + +# My Deephaven CLI + +A CLI-only package providing command-line tools for data processing with Deephaven. This package is designed to be installed and run as a command-line tool. + +## Installation + +```shell +pip install . +``` + +Or in editable mode for development: + +```shell +pip install -e . +``` + +## Usage + +After installation, use the command-line tool: + +```shell +my-dh-query input_data.csv +my-dh-query input_data.csv --verbose +``` + +### Module Execution + +You can also run without installation using module execution: + +```shell +python -m my_dh_cli input_data.csv +``` + +## Commands + +### my-dh-query + +Process a CSV file with Deephaven. + +**Arguments:** +- `input_file` - Path to the CSV file to process + +**Options:** +- `--verbose, -v` - Enable verbose output + +## Requirements + +- Python 3.8 or later +- Deephaven Server 0.35.0 or later +- Click 8.0.0 or later diff --git a/my_dh_cli/src/my_dh_cli.egg-info/SOURCES.txt b/my_dh_cli/src/my_dh_cli.egg-info/SOURCES.txt new file mode 100644 index 0000000..03fe790 --- /dev/null +++ b/my_dh_cli/src/my_dh_cli.egg-info/SOURCES.txt @@ -0,0 +1,11 @@ +README.md +pyproject.toml +src/my_dh_cli/__init__.py +src/my_dh_cli/__main__.py +src/my_dh_cli/cli.py +src/my_dh_cli.egg-info/PKG-INFO +src/my_dh_cli.egg-info/SOURCES.txt +src/my_dh_cli.egg-info/dependency_links.txt +src/my_dh_cli.egg-info/entry_points.txt +src/my_dh_cli.egg-info/requires.txt +src/my_dh_cli.egg-info/top_level.txt \ No newline at end of file diff --git a/my_dh_cli/src/my_dh_cli.egg-info/dependency_links.txt b/my_dh_cli/src/my_dh_cli.egg-info/dependency_links.txt new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/my_dh_cli/src/my_dh_cli.egg-info/dependency_links.txt @@ -0,0 +1 @@ + diff --git a/my_dh_cli/src/my_dh_cli.egg-info/entry_points.txt b/my_dh_cli/src/my_dh_cli.egg-info/entry_points.txt new file mode 100644 index 0000000..d94621b --- /dev/null +++ b/my_dh_cli/src/my_dh_cli.egg-info/entry_points.txt @@ -0,0 +1,2 @@ +[console_scripts] +my-dh-query = my_dh_cli.cli:app diff --git a/my_dh_cli/src/my_dh_cli.egg-info/requires.txt b/my_dh_cli/src/my_dh_cli.egg-info/requires.txt new file mode 100644 index 0000000..18973ba --- /dev/null +++ b/my_dh_cli/src/my_dh_cli.egg-info/requires.txt @@ -0,0 +1,2 @@ +deephaven-server>=0.35.0 +click>=8.0.0 diff --git a/my_dh_cli/src/my_dh_cli.egg-info/top_level.txt b/my_dh_cli/src/my_dh_cli.egg-info/top_level.txt new file mode 100644 index 0000000..5ee37e7 --- /dev/null +++ b/my_dh_cli/src/my_dh_cli.egg-info/top_level.txt @@ -0,0 +1 @@ +my_dh_cli diff --git a/my_dh_cli/src/my_dh_cli/__init__.py b/my_dh_cli/src/my_dh_cli/__init__.py new file mode 100644 index 0000000..cd9785f --- /dev/null +++ b/my_dh_cli/src/my_dh_cli/__init__.py @@ -0,0 +1,3 @@ +"""My Deephaven package for data processing.""" + +__version__ = "0.1.0" diff --git a/my_dh_cli/src/my_dh_cli/__main__.py b/my_dh_cli/src/my_dh_cli/__main__.py new file mode 100644 index 0000000..ff7364e --- /dev/null +++ b/my_dh_cli/src/my_dh_cli/__main__.py @@ -0,0 +1,4 @@ +from my_dh_cli.cli import app + +if __name__ == "__main__": + app() diff --git a/my_dh_cli/src/my_dh_cli/__pycache__/__init__.cpython-312.pyc b/my_dh_cli/src/my_dh_cli/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..29c5838225023eddf3bbe04c328e8a21fd7c23bb GIT binary patch literal 267 zcmX@j%ge<81a+?qGTnjnV-N=h7@>^MJV3^Dh7^V_$Jha<^>WT Vm>C%vZ!+jV;1g}+E@B0W008cWPgMW_ literal 0 HcmV?d00001 diff --git a/my_dh_cli/src/my_dh_cli/__pycache__/cli.cpython-312.pyc b/my_dh_cli/src/my_dh_cli/__pycache__/cli.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8d23a93f7e28e8e181d948dc9fd9185805e89af3 GIT binary patch literal 1606 zcmah}&2QX96rZuZUhh}ZkVcRwDQ=ol7HKz7!X=afN|QrrtBMqm0+Ho-W_P{u+GfU1 zS!*M0L{ZaIb8DmuX?to=a^T8;fW&1*f>cZotq=!JECg}ri5c&jO{+?btoPo$H}C!4 zy!XaG7VIAg={muhk3Dc(S`kq!YEgUA-Fj;XZHhO7A zkO@Xv0m0$-jId%sBv_yZ*98a~kc@iV9<8bzOMQ;S2Dl55^tBjY8Ug_{ga(NB>taK! z#trE^0+cAjeyP?ahIQ(Hu#yd_`kZDkeB~&9Ls|s+RPg?XSVyX^&Mkhd^4qJrjZ_zt zl0#}tP0gCil%M;U)K!P+b|Xt4R0sBH?Aq!%!hCl4j7gAaj*C2By@s%W5DN zi^br@3y!~H;YGtCSbazB^4HX}+>ImomQI6o&kZ9rNT|fQPV;h@LKwRheGS`$7dA{7 zmT9bp5;iJMDElr;0f!Pb1DsC~-XOw9q zs$Oxi=hdhG11h+6uj1GRS2tGmGP}fdt*)Vp#*n8eF^=q*rs&qgOxGS2i5z1fJO%n0 zBTev|H2F9=@KDL!PTxvzecBppWq0P=DGZ#>S8Y*qJjCz}h8a>KWmZ!Nb^pKnjS zyScodd#|~0#A~S~-_P93Y_IRkw};OjC~rQ>3^gwwviRT{tDua(FsVnGBzbig2dP4# zH^G5WSPMqxZJpI#?dep<_t>cCg)nJStz_xtP^w_d0Vm~$Y{36lRFEHT_@On=c zfCiiT_}Q)t9zPSUD))G1AsA0Ndvrox490tnj^;H$CoBuQ@-?HYCLB>N-cU|%llSa5{f{`ZpI}HX#x5rwa?B@#2g@;n= z=EaSR?ZMMONw1J0?xbj1O*d^#%fp3yI>HJ?>XA@daU3g3AWX8HYV47OicZRY4clHQ zUDdrxC_65H8xbgvWa~AoY2Sb-bd=N+;CM$*>A5MoSHp=Z$E%JW*XlZ z-(20E**V^x`QQLv>Ifn{v&G^u)|!gg&dgtYcPIkkGUW6eCqo^XDLYvpWjBK##nGRo O None: + """Process data with Deephaven.""" + result = my_dh_query(input_file, verbose) + click.echo("Processing complete!") + + +if __name__ == "__main__": + app() diff --git a/my_dh_library/README.md b/my_dh_library/README.md new file mode 100644 index 0000000..d275e18 --- /dev/null +++ b/my_dh_library/README.md @@ -0,0 +1,55 @@ +# My Deephaven Library + +A library-only package providing reusable Deephaven query functions. This package contains no CLI tools - it's designed to be imported and used as a library in other Python projects. + +## Installation + +```shell +pip install . +``` + +Or in editable mode for development: + +```shell +pip install -e . +``` + +## Usage + +> [!NOTE] +> All Deephaven functionality requires a running server. Start the server before importing Deephaven modules. + +Import and use the library functions in your Python code: + +```python +# Start the Deephaven server +from deephaven_server import Server +server = Server(port=10000, jvm_args=["-Xmx4g"]) +server.start() + +# Now use the library functions +from my_dh_library.queries import filter_by_threshold, add_computed_columns +from deephaven import read_csv + +data = read_csv("data.csv") +filtered = filter_by_threshold(data, "Score", 75.0) +enhanced = add_computed_columns(filtered) +``` + +## Available Functions + +### Query Functions (`my_dh_library.queries`) + +- `filter_by_threshold(table, column, threshold)` - Filter table rows where column value exceeds threshold +- `add_computed_columns(table)` - Add commonly used computed columns to a table +- `summarize_by_group(table, group_col, value_col)` - Create summary statistics grouped by a column + +### Utility Functions (`my_dh_library.utils`) + +- `validate_columns(table, required_columns)` - Check if table has all required columns +- `get_table_info(table)` - Get basic information about a table + +## Requirements + +- Python 3.8 or later +- Deephaven Server 0.35.0 or later diff --git a/my_dh_library/pyproject.toml b/my_dh_library/pyproject.toml new file mode 100644 index 0000000..3fa6b90 --- /dev/null +++ b/my_dh_library/pyproject.toml @@ -0,0 +1,16 @@ +[build-system] +requires = ["setuptools>=61.0", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "my_dh_library" +version = "0.1.0" +description = "Reusable Deephaven query functions" +readme = "README.md" +requires-python = ">=3.8" +dependencies = [ + "deephaven-server>=0.35.0", +] + +[tool.setuptools.packages.find] +where = ["src"] diff --git a/my_dh_library/src/my_dh_library.egg-info/PKG-INFO b/my_dh_library/src/my_dh_library.egg-info/PKG-INFO new file mode 100644 index 0000000..959f753 --- /dev/null +++ b/my_dh_library/src/my_dh_library.egg-info/PKG-INFO @@ -0,0 +1,55 @@ +Metadata-Version: 2.4 +Name: my_dh_library +Version: 0.1.0 +Summary: Reusable Deephaven query functions +Requires-Python: >=3.8 +Description-Content-Type: text/markdown +Requires-Dist: deephaven-server>=0.35.0 + +# My Deephaven Library + +A library-only package providing reusable Deephaven query functions. This package contains no CLI tools - it's designed to be imported and used as a library in other Python projects. + +## Installation + +```shell +pip install . +``` + +Or in editable mode for development: + +```shell +pip install -e . +``` + +## Usage + +Import and use the library functions in your Python code: + +```python +from my_dh_library.queries import filter_by_threshold, add_computed_columns +from deephaven import read_csv + +# Use the library functions +data = read_csv("data.csv") +filtered = filter_by_threshold(data, "Score", 75.0) +enhanced = add_computed_columns(filtered) +``` + +## Available Functions + +### Query Functions (`my_dh_package.queries`) + +- `filter_by_threshold(table, column, threshold)` - Filter table rows where column value exceeds threshold +- `add_computed_columns(table)` - Add commonly used computed columns to a table +- `summarize_by_group(table, group_col, value_col)` - Create summary statistics grouped by a column + +### Utility Functions (`my_dh_package.utils`) + +- `validate_columns(table, required_columns)` - Check if table has all required columns +- `get_table_info(table)` - Get basic information about a table + +## Requirements + +- Python 3.8 or later +- Deephaven Server 0.35.0 or later diff --git a/my_dh_library/src/my_dh_library.egg-info/SOURCES.txt b/my_dh_library/src/my_dh_library.egg-info/SOURCES.txt new file mode 100644 index 0000000..29745ee --- /dev/null +++ b/my_dh_library/src/my_dh_library.egg-info/SOURCES.txt @@ -0,0 +1,10 @@ +README.md +pyproject.toml +src/my_dh_library/__init__.py +src/my_dh_library/queries.py +src/my_dh_library/utils.py +src/my_dh_library.egg-info/PKG-INFO +src/my_dh_library.egg-info/SOURCES.txt +src/my_dh_library.egg-info/dependency_links.txt +src/my_dh_library.egg-info/requires.txt +src/my_dh_library.egg-info/top_level.txt \ No newline at end of file diff --git a/my_dh_library/src/my_dh_library.egg-info/dependency_links.txt b/my_dh_library/src/my_dh_library.egg-info/dependency_links.txt new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/my_dh_library/src/my_dh_library.egg-info/dependency_links.txt @@ -0,0 +1 @@ + diff --git a/my_dh_library/src/my_dh_library.egg-info/requires.txt b/my_dh_library/src/my_dh_library.egg-info/requires.txt new file mode 100644 index 0000000..b68ceb0 --- /dev/null +++ b/my_dh_library/src/my_dh_library.egg-info/requires.txt @@ -0,0 +1 @@ +deephaven-server>=0.35.0 diff --git a/my_dh_library/src/my_dh_library.egg-info/top_level.txt b/my_dh_library/src/my_dh_library.egg-info/top_level.txt new file mode 100644 index 0000000..e663cac --- /dev/null +++ b/my_dh_library/src/my_dh_library.egg-info/top_level.txt @@ -0,0 +1 @@ +my_dh_library diff --git a/my_dh_library/src/my_dh_library/__init__.py b/my_dh_library/src/my_dh_library/__init__.py new file mode 100644 index 0000000..6e191c0 --- /dev/null +++ b/my_dh_library/src/my_dh_library/__init__.py @@ -0,0 +1,7 @@ +"""My Deephaven package for data processing.""" + +__version__ = "0.1.0" + +from my_dh_library.queries import filter_by_threshold, add_computed_columns, summarize_by_group + +__all__ = ["filter_by_threshold", "add_computed_columns", "summarize_by_group"] diff --git a/my_dh_library/src/my_dh_library/__pycache__/__init__.cpython-312.pyc b/my_dh_library/src/my_dh_library/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b37569a3e1e4e05ff48dffac6d0a09febdba9acd GIT binary patch literal 427 zcmZ8cu};G<6m*(|qE=-9DlujxLzA*2CRSi#Aj^{*o5o9bn5Bir literal 0 HcmV?d00001 diff --git a/my_dh_library/src/my_dh_library/__pycache__/queries.cpython-312.pyc b/my_dh_library/src/my_dh_library/__pycache__/queries.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ed1febe3b02bfeabb273101e9b1942c20bb341de GIT binary patch literal 1355 zcmZ{jJ&fE$6vt=mwfCC~u2btj@7BzYflQMA0sym}()8v|SA&sxytYm`)AWq9)V!I<$SKwQHzz z8MkN$VpGLk$m_wU$?L;;yBJu*^8GJZF5Gd*$W_MDiM!8Y@?Fk&LH6?4%Yq~p_Fqzm zg)8H_OstGd8PiL`Jc_GsHsMT6l8{y=XIai;4<52y26!@_+pyh72k0hV-YuO2`~aSI zpglkj^~cBe*N~-^AAKBz8RJBiBs{q#$gK(Ej5Ni`z8mI@uZz?Bwk0 z*=L8RH-7DJ9=0~;aF4tfNy!H&j_csOilZD4UweU|UfP!_1zJ&*#9={l!BmvynQ~+Y zfm=e{2C-e9zMACFJV2)ID{`VqFu-sOECLU-R^FITQ4d7MGyNxNhP>RiZwTsOuWP$lvSLSJBo*DnhL#5zfRJUG( literal 0 HcmV?d00001 diff --git a/my_dh_library/src/my_dh_library/queries.py b/my_dh_library/src/my_dh_library/queries.py new file mode 100644 index 0000000..605439e --- /dev/null +++ b/my_dh_library/src/my_dh_library/queries.py @@ -0,0 +1,30 @@ +"""Reusable Deephaven query functions.""" + +from deephaven.table import Table + + +def filter_by_threshold(table: Table, column: str, threshold: float) -> Table: + """Filter table rows where column value exceeds threshold.""" + return table.where(f"{column} > {threshold}") + + +def add_computed_columns(table: Table) -> Table: + """Add commonly used computed columns to a table.""" + return table.update( + [ + "DoubleValue = Value * 2", + "IsHigh = Value > 100", + ] + ) + + +def summarize_by_group(table: Table, group_col: str, value_col: str) -> Table: + """Create summary statistics grouped by a column.""" + return table.agg_by( + [ + f"Sum = sum({value_col})", + f"Avg = avg({value_col})", + f"Count = count()", + ], + by=[group_col], + ) diff --git a/my_dh_library/src/my_dh_library/utils.py b/my_dh_library/src/my_dh_library/utils.py new file mode 100644 index 0000000..b8d2a88 --- /dev/null +++ b/my_dh_library/src/my_dh_library/utils.py @@ -0,0 +1,18 @@ +"""Utility functions for working with Deephaven tables.""" + +from deephaven.table import Table + + +def validate_columns(table: Table, required_columns: list[str]) -> bool: + """Check if table has all required columns.""" + table_columns = [col.name for col in table.columns] + return all(col in table_columns for col in required_columns) + + +def get_table_info(table: Table) -> dict: + """Get basic information about a table.""" + return { + "num_rows": table.size, + "num_columns": len(table.columns), + "columns": [col.name for col in table.columns], + } diff --git a/my_dh_toolkit/README.md b/my_dh_toolkit/README.md new file mode 100644 index 0000000..ee3f6ea --- /dev/null +++ b/my_dh_toolkit/README.md @@ -0,0 +1,104 @@ +# My Deephaven Toolkit + +A combined package providing both reusable library code and command-line functions for Deephaven. This package can be used as both a library (imported in Python code) and as CLI functions (called within a Python session). + +## Installation + +```shell +pip install . +``` + +Or in editable mode for development: + +```shell +pip install -e . +``` + +## Usage as a Library + +> [!NOTE] +> All Deephaven functionality requires a running server. Start the server before importing Deephaven modules. + +Import and use the library functions in your Python code: + +```python +# Start the Deephaven server +from deephaven_server import Server +server = Server(port=10000, jvm_args=["-Xmx4g"]) +server.start() + +# Now use the library functions +from my_dh_toolkit.queries import filter_by_threshold, add_computed_columns +from my_dh_toolkit import my_dh_query, batch_process +from deephaven import read_csv + +data = read_csv("data.csv") +filtered = filter_by_threshold(data, "Score", 75.0) + +# Or use the exported functions +result = my_dh_query("data.csv", verbose=True) +``` + +## Usage as CLI Functions + +> [!NOTE] +> CLI functions require a Deephaven server running in the same Python process. Use them within a Python session where the server is already started. + +```python +# Start the Deephaven server +from deephaven_server import Server +server = Server(port=10000, jvm_args=["-Xmx4g"]) +server.start() + +# Use the CLI functions +from my_dh_toolkit import my_dh_query, batch_process +result = my_dh_query("../data/sample.csv", verbose=True) +batch_process("../data/batch/", "./output", verbose=True) +``` + +## Commands + +### my-dh-query + +Process a single CSV file with Deephaven. + +**Arguments:** +- `input_file` - Path to the CSV file to process + +**Options:** +- `--verbose, -v` - Enable verbose output + +### my-dh-process + +Batch process multiple CSV files from a directory. + +**Arguments:** +- `directory` - Directory containing CSV files to process + +**Options:** +- `--output, -o` - Output directory (default: ./output) +- `--verbose, -v` - Enable verbose output + +## Available Functions + +### Query Functions (`my_dh_toolkit.queries`) + +- `filter_by_threshold(table, column, threshold)` - Filter table rows where column value exceeds threshold +- `add_computed_columns(table)` - Add commonly used computed columns to a table +- `summarize_by_group(table, group_col, value_col)` - Create summary statistics grouped by a column + +### Utility Functions (`my_dh_toolkit.utils`) + +- `validate_columns(table, required_columns)` - Check if table has all required columns +- `get_table_info(table)` - Get basic information about a table + +### Exported Functions (`my_dh_toolkit`) + +- `my_dh_query(input_file, verbose)` - Read and process a CSV file +- `batch_process(directory, output_dir, verbose)` - Process multiple CSV files + +## Requirements + +- Python 3.8 or later +- Deephaven Server 0.35.0 or later +- Click 8.0.0 or later diff --git a/my_dh_toolkit/pyproject.toml b/my_dh_toolkit/pyproject.toml new file mode 100644 index 0000000..81bc624 --- /dev/null +++ b/my_dh_toolkit/pyproject.toml @@ -0,0 +1,21 @@ +[build-system] +requires = ["setuptools>=61.0", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "my_dh_toolkit" +version = "0.1.0" +description = "Deephaven library and CLI tools" +readme = "README.md" +requires-python = ">=3.8" +dependencies = [ + "deephaven-server>=0.35.0", + "click>=8.0.0", +] + +[project.scripts] +my-dh-query = "my_dh_toolkit.cli:app" +my-dh-process = "my_dh_toolkit.processor:process" + +[tool.setuptools.packages.find] +where = ["src"] diff --git a/my_dh_toolkit/src/my_dh_toolkit/__init__.py b/my_dh_toolkit/src/my_dh_toolkit/__init__.py new file mode 100644 index 0000000..31bf609 --- /dev/null +++ b/my_dh_toolkit/src/my_dh_toolkit/__init__.py @@ -0,0 +1,8 @@ +"""My Deephaven package for data processing.""" + +__version__ = "0.1.0" + +from my_dh_toolkit.cli import my_dh_query +from my_dh_toolkit.processor import batch_process + +__all__ = ["my_dh_query", "batch_process"] diff --git a/my_dh_toolkit/src/my_dh_toolkit/__main__.py b/my_dh_toolkit/src/my_dh_toolkit/__main__.py new file mode 100644 index 0000000..c7407a9 --- /dev/null +++ b/my_dh_toolkit/src/my_dh_toolkit/__main__.py @@ -0,0 +1,4 @@ +from my_dh_toolkit.cli import app + +if __name__ == "__main__": + app() diff --git a/my_dh_toolkit/src/my_dh_toolkit/cli.py b/my_dh_toolkit/src/my_dh_toolkit/cli.py new file mode 100644 index 0000000..7ffd645 --- /dev/null +++ b/my_dh_toolkit/src/my_dh_toolkit/cli.py @@ -0,0 +1,31 @@ +import click + + +def my_dh_query(input_file: str, verbose: bool = False): + """Read a CSV file and perform a simple query operation on the data.""" + from deephaven import read_csv + + if verbose: + click.echo(f"Processing {input_file}...") + + source = read_csv(input_file) + + result = source.update(formulas=["DoubleScore = Score * 2"]) + + if verbose: + click.echo(f"Processed {result.size} rows") + + return result + + +@click.command() +@click.argument("input_file", type=click.Path(exists=True)) +@click.option("--verbose", "-v", is_flag=True, help="Enable verbose output") +def app(input_file: str, verbose: bool) -> None: + """Process data with Deephaven.""" + result = my_dh_query(input_file, verbose) + click.echo("Processing complete!") + + +if __name__ == "__main__": + app() diff --git a/my_dh_toolkit/src/my_dh_toolkit/processor.py b/my_dh_toolkit/src/my_dh_toolkit/processor.py new file mode 100644 index 0000000..009b099 --- /dev/null +++ b/my_dh_toolkit/src/my_dh_toolkit/processor.py @@ -0,0 +1,40 @@ +import click +from pathlib import Path + + +def batch_process(directory: str, output_dir: str, verbose: bool = False) -> None: + """Process multiple CSV files from a directory.""" + from deephaven import read_csv + + input_path = Path(directory) + output_path = Path(output_dir) + output_path.mkdir(exist_ok=True) + + csv_files = list(input_path.glob("*.csv")) + + if verbose: + click.echo(f"Found {len(csv_files)} CSV files to process") + + for csv_file in csv_files: + if verbose: + click.echo(f"Processing {csv_file.name}...") + + table = read_csv(str(csv_file)) + processed = table.update(formulas=["ProcessedScore = Score * 2"]) + + if verbose: + click.echo(f" Processed {processed.size} rows") + + +@click.command() +@click.argument("directory", type=click.Path(exists=True, file_okay=False)) +@click.option("--output", "-o", default="./output", help="Output directory") +@click.option("--verbose", "-v", is_flag=True, help="Enable verbose output") +def process(directory: str, output: str, verbose: bool) -> None: + """Batch process CSV files with Deephaven.""" + batch_process(directory, output, verbose) + click.echo("Batch processing complete!") + + +if __name__ == "__main__": + process() diff --git a/my_dh_toolkit/src/my_dh_toolkit/queries.py b/my_dh_toolkit/src/my_dh_toolkit/queries.py new file mode 100644 index 0000000..605439e --- /dev/null +++ b/my_dh_toolkit/src/my_dh_toolkit/queries.py @@ -0,0 +1,30 @@ +"""Reusable Deephaven query functions.""" + +from deephaven.table import Table + + +def filter_by_threshold(table: Table, column: str, threshold: float) -> Table: + """Filter table rows where column value exceeds threshold.""" + return table.where(f"{column} > {threshold}") + + +def add_computed_columns(table: Table) -> Table: + """Add commonly used computed columns to a table.""" + return table.update( + [ + "DoubleValue = Value * 2", + "IsHigh = Value > 100", + ] + ) + + +def summarize_by_group(table: Table, group_col: str, value_col: str) -> Table: + """Create summary statistics grouped by a column.""" + return table.agg_by( + [ + f"Sum = sum({value_col})", + f"Avg = avg({value_col})", + f"Count = count()", + ], + by=[group_col], + ) diff --git a/my_dh_toolkit/src/my_dh_toolkit/utils.py b/my_dh_toolkit/src/my_dh_toolkit/utils.py new file mode 100644 index 0000000..b8d2a88 --- /dev/null +++ b/my_dh_toolkit/src/my_dh_toolkit/utils.py @@ -0,0 +1,18 @@ +"""Utility functions for working with Deephaven tables.""" + +from deephaven.table import Table + + +def validate_columns(table: Table, required_columns: list[str]) -> bool: + """Check if table has all required columns.""" + table_columns = [col.name for col in table.columns] + return all(col in table_columns for col in required_columns) + + +def get_table_info(table: Table) -> dict: + """Get basic information about a table.""" + return { + "num_rows": table.size, + "num_columns": len(table.columns), + "columns": [col.name for col in table.columns], + }