Skip to content

Commit

Permalink
Merge pull request #39 from eclipsevortex/release/2.2.3
Browse files Browse the repository at this point in the history
Release/2.2.3
  • Loading branch information
eclipsevortex authored May 7, 2024
2 parents b6fb3d4 + 22ea24a commit dfe4ca1
Show file tree
Hide file tree
Showing 46 changed files with 3,131 additions and 183 deletions.
11 changes: 11 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,16 @@
# Changelog

## 2.2.3 / 2024-05-06

## What's Changed
* Release/2.2.2 by @eclipsevortex in https://github.com/eclipsevortex/SubVortex/pull/36
* Add unit tests for resync miners by @eclipsevortex in https://github.com/eclipsevortex/SubVortex/pull/38
* implement auto upgrade by @eclipsevortex in https://github.com/eclipsevortex/SubVortex/pull/40
* isolate wandb by @eclipsevortex in https://github.com/eclipsevortex/SubVortex/pull/41


**Full Changelog**: https://github.com/eclipsevortex/SubVortex/compare/v2.2.2...v2.2.3

## 2.2.2 / 2024-04-25

**Full Changelog**: https://github.com/eclipsevortex/SubVortex/compare/v2.2.1...v2.2.2
Expand Down
8 changes: 6 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -345,7 +345,8 @@ pm2 start neurons/miner.py \
--subtensor.network local \
--wallet.name YOUR_WALLET_NAME \
--wallet.hotkey YOUR_HOTKEY_NAME \
--logging.debug
--logging.debug \
--auto-update
```

> IMPORTANT: Do not run more than one miner per machine. Running multiple miners will result in the loss of incentive and emissions on all miners.
Expand All @@ -367,13 +368,16 @@ pm2 start neurons/validator.py \
--netuid <SUBNET_UID> \
--wallet.name YOUR_WALLET_NAME \
--wallet.hotkey YOUR_HOTKEY_NAME \
--logging.debug
--logging.debug \
--auto-update
```

> NOTE: if you run a validator in testnet do not forget to add the argument `--subtensor.network test` or `--subtensor.chain_endpoint ws://<LOCAL_SUBTENSOR_IP>:9944` (the local subtensor has to target the network testnet)
> NOTE: to access the wandb UI to get statistics about the miners, you can click on this [link](https://wandb.ai/eclipsevortext/subvortex-team) and choose the validator run you want.
> NOTE: by default the dumps created by the auto-update will be stored in /etc/redis. If you want to change the location, please use `--database.redis_dump_path`.
## Releases

- [Release-2.2.0](./scripts/release/release-2.2.0/RELEASE-2.2.0.md)
Expand Down
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
2.2.2
2.2.3
3 changes: 3 additions & 0 deletions neurons/miner.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,9 @@ def __init__(self):
bt.logging(config=self.config, logging_dir=self.config.miner.full_path)
bt.logging.info(f"{self.config}")

# Show miner version
bt.logging.debug(f"miner version {THIS_VERSION}")

# Init device.
bt.logging.debug("loading device")
self.device = torch.device(self.config.miner.device)
Expand Down
36 changes: 28 additions & 8 deletions neurons/validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,12 @@
from typing import List
from traceback import print_exception

from subnet import __version__ as THIS_VERSION

from subnet.monitor.monitor import Monitor

from subnet.shared.checks import check_registration
from subnet.shared.utils import get_redis_password
from subnet.shared.utils import get_redis_password, should_upgrade
from subnet.shared.subtensor import get_current_block
from subnet.shared.weights import should_set_weights
from subnet.shared.mock import MockMetagraph, MockDendrite, MockSubtensor
Expand All @@ -37,13 +39,14 @@
from subnet.validator.localisation import get_country, get_localisation
from subnet.validator.forward import forward
from subnet.validator.models import Miner
from subnet.validator.version import VersionControl
from subnet.validator.miner import get_all_miners
from subnet.validator.state import (
resync_metagraph_and_miners,
load_state,
save_state,
init_wandb,
reinit_wandb,
finish_wandb,
should_reinit_wandb,
)
from subnet.validator.weights import (
Expand Down Expand Up @@ -88,6 +91,9 @@ def __init__(self, config=None):
self.check_config(self.config)
bt.logging(config=self.config, logging_dir=self.config.neuron.full_path)

# Show miner version
bt.logging.debug(f"validator version {THIS_VERSION}")

# Init device.
bt.logging.debug("loading device")
self.device = torch.device(self.config.neuron.device)
Expand Down Expand Up @@ -181,10 +187,15 @@ def __init__(self, config=None):
self.last_registered_block = 0
self.rebalance_queue = []
self.miners: List[Miner] = []
self.last_upgrade_check = 0

async def run(self):
bt.logging.info("run()")

# Initi versioin control
dump_path = self.config.database.redis_dump_path
self.version_control = VersionControl(self.database, dump_path)

# Init miners
self.miners = await get_all_miners(self)
bt.logging.debug(f"Miners loaded {len(self.miners)}")
Expand All @@ -198,6 +209,17 @@ async def run(self):

try:
while 1:
# Start the upgrade process every 10 minutes
if should_upgrade(self.config.auto_update, self.last_upgrade_check):
bt.logging.debug("Checking upgrade")
must_restart = await self.version_control.upgrade()
if must_restart:
finish_wandb()
self.version_control.restart()
return

self.last_upgrade_check = time.time()

start_epoch = time.time()

await resync_metagraph_and_miners(self)
Expand Down Expand Up @@ -259,22 +281,20 @@ async def run_forward():
# Rollover wandb to a new run.
if should_reinit_wandb(self):
bt.logging.info("Reinitializing wandb")
reinit_wandb(self)
finish_wandb()
init_wandb(self)

self.prev_step_block = get_current_block(self.subtensor)
if self.config.neuron.verbose:
bt.logging.debug(f"block at end of step: {self.prev_step_block}")
bt.logging.debug(f"Step took {time.time() - start_epoch} seconds")

self.step += 1

except Exception as err:
bt.logging.error("Error in training loop", str(err))
bt.logging.debug(print_exception(type(err), err, err.__traceback__))

if self.wandb is not None:
self.wandb.finish()
assert self.wandb.run is None
bt.logging.debug("Finishing wandb run")
finish_wandb()

# After all we have to ensure subtensor connection is closed properly
finally:
Expand Down
68 changes: 68 additions & 0 deletions scripts/redis/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,12 @@ This document explains how to install and uninstall a redis.
- [Uninstallation](#uninstallation)
- [As process](#uninstallation-as-process)
- [As docker container](#uninstallation-as-container)
- [Migration](#migration)
- [Rollout](#migration-rollout)
- [Rollback](#migration-rollback)
- [Dump](#migration)
- [Creation](#dump-creation)
- [Restoration](#dump-restoration)

---

Expand Down Expand Up @@ -252,3 +258,65 @@ You shoud have something similar (or at least list that does not container `subv
```
CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES
```

# Migration

## Rollout <a id="migration-rollout"></a>

To rollout any Redis migration manually, you can use the python script `redis_migration.py`.

For example, if you want to rollout the version 2.2.1, you can run in `SubVortex`

```
python3 ./scripts/redis/utils/redis_migration.py --run-type rollout --version 2.2.1
```

> IMPORTANT <br />
> If you have to rollout multiple versions, execute them one by one from your current version to the targeted one.
## Rollback <a id="migration-rollback"></a>

To rollback any Redis migration manually, you can use the python script `redis_migration.py`.

For example, if you want to rollback the version 2.2.1, you can run in `SubVortex`

```
python3 ./scripts/redis/utils/redis_migration.py --run-type rollback --version 2.2.1
```

> IMPORTANT <br />
> If you have to rollback multiple versions, execute them one by one from your current version to the targeted one.
# Dump

## Creation <a id="dump-creation"></a>

To create a Redis dump manually, you can use the python script `redis_dump.py`.

For example, if you want to create the dump in the `subVortex` directory, you can run

```
python3 ./scripts/redis/utils/redis_dump.py --run-type create --dump-path redis-dump-2.0.0.json
```

If you want to create the dump in another location and/or name, you can use the argument `--dump-path`

```
python3 ./scripts/redis/utils/redis_dump.py --run-type create --dump-path /tmp/redis/redis-dump-2.0.0.json
```

## Restoration <a id="dump-restoration"></a>

To restore a Redis dump manually, you can use the python script `redis_dump.py`.

For example, if you want to create in `subVortex` directory, you can run

```
python3 ./scripts/redis/utils/redis_dump.py --run-type restore --dump-path redis-dump-2.0.0.json
```

If you want to restore a dump in another location, you can use the argument `--dump-path`

```
python3 ./scripts/redis/utils/redis_dump.py --run-type restore --dump-path /tmp/redis/redis-dump-2.0.0.json
```
43 changes: 43 additions & 0 deletions scripts/redis/migrations/migration-2.2.0.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
from redis import asyncio as aioredis

current = "2.0.0"


async def rollout(database: aioredis.Redis):
async for key in database.scan_iter("stats:*"):
metadata_dict = await database.hgetall(key)

if b"subtensor_successes" not in metadata_dict:
await database.hset(key, b"subtensor_successes", 0)
if b"subtensor_attempts" not in metadata_dict:
await database.hset(key, b"subtensor_attempts", 0)
if b"metric_successes" not in metadata_dict:
await database.hset(key, b"metric_successes", 0)
if b"metric_attempts" not in metadata_dict:
await database.hset(key, b"metric_attempts", 0)
if b"total_successes" not in metadata_dict:
await database.hset(key, b"total_successes", 0)
if b"tier" not in metadata_dict:
await database.hset(key, b"tier", "Bronze")

await database.set("version", current)


async def rollback(database: aioredis.Redis):
async for key in database.scan_iter("stats:*"):
metadata_dict = await database.hgetall(key)

if b"subtensor_successes" in metadata_dict:
await database.hdel(key, b"subtensor_successes")
if b"subtensor_attempts" in metadata_dict:
await database.hdel(key, b"subtensor_attempts")
if b"metric_successes" in metadata_dict:
await database.hdel(key, b"metric_successes")
if b"metric_attempts" in metadata_dict:
await database.hdel(key, b"metric_attempts")
if b"total_successes" in metadata_dict:
await database.hdel(key, b"total_successes")
if b"tier" in metadata_dict:
await database.hdel(key, b"tier")

await database.set("version", None)
98 changes: 98 additions & 0 deletions scripts/redis/utils/redis_dump.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
import asyncio
import argparse
import bittensor as bt
from redis import asyncio as aioredis

from subnet.shared.utils import get_redis_password
from subnet.validator.database import create_dump, restore_dump


async def create(args):
try:
bt.logging.info(
f"Loading database from {args.database_host}:{args.database_port}"
)
redis_password = get_redis_password(args.redis_password)
database = aioredis.StrictRedis(
host=args.database_host,
port=args.database_port,
db=args.database_index,
password=redis_password,
)

bt.logging.info("Create dump starting")

await create_dump(args.dump_path, database)

bt.logging.success("Create dump successful")
except Exception as e:
bt.logging.error(f"Error during rollout: {e}")


async def restore(args):
try:
bt.logging.info(
f"Loading database from {args.database_host}:{args.database_port}"
)
redis_password = get_redis_password(args.redis_password)
database = aioredis.StrictRedis(
host=args.database_host,
port=args.database_port,
db=args.database_index,
password=redis_password,
)

bt.logging.info("Restore dump starting")

await restore_dump(args.dump_path, database)

bt.logging.success("Restore dump successful")

except Exception as e:
bt.logging.error(f"Error during rollback: {e}")


async def main(args):
if args.run_type == "create":
await create(args)
else:
await restore(args)


if __name__ == "__main__":
try:
parser = argparse.ArgumentParser()
parser.add_argument(
"--run-type",
type=str,
default="create",
help="Type of migration you want too execute. Possible values are rollout or rollback)",
)
parser.add_argument(
"--dump-path",
type=str,
default="/tmp/redis",
help="Dump file (with path) to create or restore",
)
parser.add_argument(
"--redis_password",
type=str,
default=None,
help="password for the redis database",
)
parser.add_argument(
"--redis_conf_path",
type=str,
default="/etc/redis/redis.conf",
help="path to the redis configuration file",
)
parser.add_argument("--database_host", type=str, default="localhost")
parser.add_argument("--database_port", type=int, default=6379)
parser.add_argument("--database_index", type=int, default=1)
args = parser.parse_args()

asyncio.run(main(args))
except KeyboardInterrupt:
print("KeyboardInterrupt")
except ValueError as e:
print(f"ValueError: {e}")
Loading

0 comments on commit dfe4ca1

Please sign in to comment.