2 * s390host.c -- hosting zSeries kernel virtual machines
4 * Copyright IBM Corp. 2008
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
10 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 * Christian Borntraeger <borntraeger@de.ibm.com>
12 * Heiko Carstens <heiko.carstens@de.ibm.com>
15 #include <linux/compiler.h>
16 #include <linux/err.h>
18 #include <linux/init.h>
19 #include <linux/kvm.h>
20 #include <linux/kvm_host.h>
21 #include <linux/module.h>
22 #include <linux/slab.h>
23 #include <linux/timer.h>
24 #include <asm/lowcore.h>
25 #include <asm/pgtable.h>
30 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
32 struct kvm_stats_debugfs_item debugfs_entries[] = {
33 { "userspace_handled", VCPU_STAT(exit_userspace) },
34 { "exit_null", VCPU_STAT(exit_null) },
35 { "exit_validity", VCPU_STAT(exit_validity) },
36 { "exit_stop_request", VCPU_STAT(exit_stop_request) },
37 { "exit_external_request", VCPU_STAT(exit_external_request) },
38 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
39 { "exit_instruction", VCPU_STAT(exit_instruction) },
40 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
41 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
42 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
43 { "instruction_lctl", VCPU_STAT(instruction_lctl) },
44 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
45 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
46 { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
47 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
48 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
49 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
50 { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
51 { "exit_wait_state", VCPU_STAT(exit_wait_state) },
52 { "instruction_stidp", VCPU_STAT(instruction_stidp) },
53 { "instruction_spx", VCPU_STAT(instruction_spx) },
54 { "instruction_stpx", VCPU_STAT(instruction_stpx) },
55 { "instruction_stap", VCPU_STAT(instruction_stap) },
56 { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
57 { "instruction_stsch", VCPU_STAT(instruction_stsch) },
58 { "instruction_chsc", VCPU_STAT(instruction_chsc) },
59 { "instruction_stsi", VCPU_STAT(instruction_stsi) },
60 { "instruction_stfl", VCPU_STAT(instruction_stfl) },
61 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
62 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
63 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
64 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
65 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
66 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
67 { "diagnose_44", VCPU_STAT(diagnose_44) },
72 /* Section: not file related */
73 void kvm_arch_hardware_enable(void *garbage)
75 /* every s390 is virtualization enabled ;-) */
78 void kvm_arch_hardware_disable(void *garbage)
82 int kvm_arch_hardware_setup(void)
87 void kvm_arch_hardware_unsetup(void)
91 void kvm_arch_check_processor_compat(void *rtn)
95 int kvm_arch_init(void *opaque)
100 void kvm_arch_exit(void)
104 /* Section: device related */
105 long kvm_arch_dev_ioctl(struct file *filp,
106 unsigned int ioctl, unsigned long arg)
108 if (ioctl == KVM_S390_ENABLE_SIE)
109 return s390_enable_sie();
113 int kvm_dev_ioctl_check_extension(long ext)
121 /* Section: vm related */
123 * Get (and clear) the dirty memory log for a memory slot.
125 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
126 struct kvm_dirty_log *log)
131 long kvm_arch_vm_ioctl(struct file *filp,
132 unsigned int ioctl, unsigned long arg)
134 struct kvm *kvm = filp->private_data;
135 void __user *argp = (void __user *)arg;
139 case KVM_S390_INTERRUPT: {
140 struct kvm_s390_interrupt s390int;
143 if (copy_from_user(&s390int, argp, sizeof(s390int)))
145 r = kvm_s390_inject_vm(kvm, &s390int);
155 struct kvm *kvm_arch_create_vm(void)
161 rc = s390_enable_sie();
166 kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL);
170 kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL);
174 sprintf(debug_name, "kvm-%u", current->pid);
176 kvm->arch.dbf = debug_register(debug_name, 8, 2, 8 * sizeof(long));
180 spin_lock_init(&kvm->arch.float_int.lock);
181 INIT_LIST_HEAD(&kvm->arch.float_int.list);
183 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
184 VM_EVENT(kvm, 3, "%s", "vm created");
188 free_page((unsigned long)(kvm->arch.sca));
195 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
197 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
198 free_page((unsigned long)(vcpu->arch.sie_block));
199 kvm_vcpu_uninit(vcpu);
203 static void kvm_free_vcpus(struct kvm *kvm)
207 for (i = 0; i < KVM_MAX_VCPUS; ++i) {
209 kvm_arch_vcpu_destroy(kvm->vcpus[i]);
210 kvm->vcpus[i] = NULL;
215 void kvm_arch_sync_events(struct kvm *kvm)
219 void kvm_arch_destroy_vm(struct kvm *kvm)
222 kvm_free_physmem(kvm);
223 free_page((unsigned long)(kvm->arch.sca));
224 debug_unregister(kvm->arch.dbf);
228 /* Section: vcpu related */
229 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
234 void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
239 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
241 save_fp_regs(&vcpu->arch.host_fpregs);
242 save_access_regs(vcpu->arch.host_acrs);
243 vcpu->arch.guest_fpregs.fpc &= FPC_VALID_MASK;
244 restore_fp_regs(&vcpu->arch.guest_fpregs);
245 restore_access_regs(vcpu->arch.guest_acrs);
248 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
250 save_fp_regs(&vcpu->arch.guest_fpregs);
251 save_access_regs(vcpu->arch.guest_acrs);
252 restore_fp_regs(&vcpu->arch.host_fpregs);
253 restore_access_regs(vcpu->arch.host_acrs);
256 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
258 /* this equals initial cpu reset in pop, but we don't switch to ESA */
259 vcpu->arch.sie_block->gpsw.mask = 0UL;
260 vcpu->arch.sie_block->gpsw.addr = 0UL;
261 vcpu->arch.sie_block->prefix = 0UL;
262 vcpu->arch.sie_block->ihcpu = 0xffff;
263 vcpu->arch.sie_block->cputm = 0UL;
264 vcpu->arch.sie_block->ckc = 0UL;
265 vcpu->arch.sie_block->todpr = 0;
266 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
267 vcpu->arch.sie_block->gcr[0] = 0xE0UL;
268 vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
269 vcpu->arch.guest_fpregs.fpc = 0;
270 asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
271 vcpu->arch.sie_block->gbea = 1;
274 /* The current code can have up to 256 pages for virtio */
275 #define VIRTIODESCSPACE (256ul * 4096ul)
277 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
279 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH);
280 vcpu->arch.sie_block->gmslm = vcpu->kvm->arch.guest_memsize +
281 vcpu->kvm->arch.guest_origin +
282 VIRTIODESCSPACE - 1ul;
283 vcpu->arch.sie_block->gmsor = vcpu->kvm->arch.guest_origin;
284 vcpu->arch.sie_block->ecb = 2;
285 vcpu->arch.sie_block->eca = 0xC1002001U;
286 setup_timer(&vcpu->arch.ckc_timer, kvm_s390_idle_wakeup,
287 (unsigned long) vcpu);
288 get_cpu_id(&vcpu->arch.cpu_id);
289 vcpu->arch.cpu_id.version = 0xff;
293 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
296 struct kvm_vcpu *vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL);
302 vcpu->arch.sie_block = (struct kvm_s390_sie_block *)
303 get_zeroed_page(GFP_KERNEL);
305 if (!vcpu->arch.sie_block)
308 vcpu->arch.sie_block->icpua = id;
309 BUG_ON(!kvm->arch.sca);
310 BUG_ON(kvm->arch.sca->cpu[id].sda);
311 kvm->arch.sca->cpu[id].sda = (__u64) vcpu->arch.sie_block;
312 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)kvm->arch.sca) >> 32);
313 vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
315 spin_lock_init(&vcpu->arch.local_int.lock);
316 INIT_LIST_HEAD(&vcpu->arch.local_int.list);
317 vcpu->arch.local_int.float_int = &kvm->arch.float_int;
318 spin_lock_bh(&kvm->arch.float_int.lock);
319 kvm->arch.float_int.local_int[id] = &vcpu->arch.local_int;
320 init_waitqueue_head(&vcpu->arch.local_int.wq);
321 vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
322 spin_unlock_bh(&kvm->arch.float_int.lock);
324 rc = kvm_vcpu_init(vcpu, kvm, id);
327 VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu,
328 vcpu->arch.sie_block);
337 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
339 /* kvm common code refers to this, but never calls it */
344 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
347 kvm_s390_vcpu_initial_reset(vcpu);
352 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
355 memcpy(&vcpu->arch.guest_gprs, ®s->gprs, sizeof(regs->gprs));
360 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
363 memcpy(®s->gprs, &vcpu->arch.guest_gprs, sizeof(regs->gprs));
368 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
369 struct kvm_sregs *sregs)
372 memcpy(&vcpu->arch.guest_acrs, &sregs->acrs, sizeof(sregs->acrs));
373 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
378 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
379 struct kvm_sregs *sregs)
382 memcpy(&sregs->acrs, &vcpu->arch.guest_acrs, sizeof(sregs->acrs));
383 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
388 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
391 memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
392 vcpu->arch.guest_fpregs.fpc = fpu->fpc;
397 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
400 memcpy(&fpu->fprs, &vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
401 fpu->fpc = vcpu->arch.guest_fpregs.fpc;
406 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
411 if (atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_RUNNING)
414 vcpu->arch.sie_block->gpsw = psw;
419 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
420 struct kvm_translation *tr)
422 return -EINVAL; /* not implemented yet */
425 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
426 struct kvm_guest_debug *dbg)
428 return -EINVAL; /* not implemented yet */
431 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
432 struct kvm_mp_state *mp_state)
434 return -EINVAL; /* not implemented yet */
437 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
438 struct kvm_mp_state *mp_state)
440 return -EINVAL; /* not implemented yet */
443 static void __vcpu_run(struct kvm_vcpu *vcpu)
445 memcpy(&vcpu->arch.sie_block->gg14, &vcpu->arch.guest_gprs[14], 16);
450 if (test_thread_flag(TIF_MCCK_PENDING))
453 kvm_s390_deliver_pending_interrupts(vcpu);
455 vcpu->arch.sie_block->icptcode = 0;
459 VCPU_EVENT(vcpu, 6, "entering sie flags %x",
460 atomic_read(&vcpu->arch.sie_block->cpuflags));
461 if (sie64a(vcpu->arch.sie_block, vcpu->arch.guest_gprs)) {
462 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
463 kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
465 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
466 vcpu->arch.sie_block->icptcode);
471 memcpy(&vcpu->arch.guest_gprs[14], &vcpu->arch.sie_block->gg14, 16);
474 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
481 if (vcpu->sigset_active)
482 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
484 atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
486 BUG_ON(vcpu->kvm->arch.float_int.local_int[vcpu->vcpu_id] == NULL);
488 switch (kvm_run->exit_reason) {
489 case KVM_EXIT_S390_SIEIC:
490 vcpu->arch.sie_block->gpsw.mask = kvm_run->s390_sieic.mask;
491 vcpu->arch.sie_block->gpsw.addr = kvm_run->s390_sieic.addr;
493 case KVM_EXIT_UNKNOWN:
494 case KVM_EXIT_S390_RESET:
504 rc = kvm_handle_sie_intercept(vcpu);
505 } while (!signal_pending(current) && !rc);
507 if (signal_pending(current) && !rc)
510 if (rc == -ENOTSUPP) {
511 /* intercept cannot be handled in-kernel, prepare kvm-run */
512 kvm_run->exit_reason = KVM_EXIT_S390_SIEIC;
513 kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
514 kvm_run->s390_sieic.mask = vcpu->arch.sie_block->gpsw.mask;
515 kvm_run->s390_sieic.addr = vcpu->arch.sie_block->gpsw.addr;
516 kvm_run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
517 kvm_run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
521 if (rc == -EREMOTE) {
522 /* intercept was handled, but userspace support is needed
523 * kvm_run has been prepared by the handler */
527 if (vcpu->sigset_active)
528 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
532 vcpu->stat.exit_userspace++;
536 static int __guestcopy(struct kvm_vcpu *vcpu, u64 guestdest, const void *from,
537 unsigned long n, int prefix)
540 return copy_to_guest(vcpu, guestdest, from, n);
542 return copy_to_guest_absolute(vcpu, guestdest, from, n);
546 * store status at address
547 * we use have two special cases:
548 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
549 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
551 int __kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
553 const unsigned char archmode = 1;
556 if (addr == KVM_S390_STORE_STATUS_NOADDR) {
557 if (copy_to_guest_absolute(vcpu, 163ul, &archmode, 1))
559 addr = SAVE_AREA_BASE;
561 } else if (addr == KVM_S390_STORE_STATUS_PREFIXED) {
562 if (copy_to_guest(vcpu, 163ul, &archmode, 1))
564 addr = SAVE_AREA_BASE;
569 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, fp_regs),
570 vcpu->arch.guest_fpregs.fprs, 128, prefix))
573 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, gp_regs),
574 vcpu->arch.guest_gprs, 128, prefix))
577 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, psw),
578 &vcpu->arch.sie_block->gpsw, 16, prefix))
581 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, pref_reg),
582 &vcpu->arch.sie_block->prefix, 4, prefix))
585 if (__guestcopy(vcpu,
586 addr + offsetof(struct save_area_s390x, fp_ctrl_reg),
587 &vcpu->arch.guest_fpregs.fpc, 4, prefix))
590 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, tod_reg),
591 &vcpu->arch.sie_block->todpr, 4, prefix))
594 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, timer),
595 &vcpu->arch.sie_block->cputm, 8, prefix))
598 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, clk_cmp),
599 &vcpu->arch.sie_block->ckc, 8, prefix))
602 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, acc_regs),
603 &vcpu->arch.guest_acrs, 64, prefix))
606 if (__guestcopy(vcpu,
607 addr + offsetof(struct save_area_s390x, ctrl_regs),
608 &vcpu->arch.sie_block->gcr, 128, prefix))
613 static int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
618 rc = __kvm_s390_vcpu_store_status(vcpu, addr);
623 long kvm_arch_vcpu_ioctl(struct file *filp,
624 unsigned int ioctl, unsigned long arg)
626 struct kvm_vcpu *vcpu = filp->private_data;
627 void __user *argp = (void __user *)arg;
630 case KVM_S390_INTERRUPT: {
631 struct kvm_s390_interrupt s390int;
633 if (copy_from_user(&s390int, argp, sizeof(s390int)))
635 return kvm_s390_inject_vcpu(vcpu, &s390int);
637 case KVM_S390_STORE_STATUS:
638 return kvm_s390_vcpu_store_status(vcpu, arg);
639 case KVM_S390_SET_INITIAL_PSW: {
642 if (copy_from_user(&psw, argp, sizeof(psw)))
644 return kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
646 case KVM_S390_INITIAL_RESET:
647 return kvm_arch_vcpu_ioctl_initial_reset(vcpu);
654 /* Section: memory related */
655 int kvm_arch_set_memory_region(struct kvm *kvm,
656 struct kvm_userspace_memory_region *mem,
657 struct kvm_memory_slot old,
660 /* A few sanity checks. We can have exactly one memory slot which has
661 to start at guest virtual zero and which has to be located at a
662 page boundary in userland and which has to end at a page boundary.
663 The memory in userland is ok to be fragmented into various different
664 vmas. It is okay to mmap() and munmap() stuff in this slot after
665 doing this call at any time */
670 if (mem->guest_phys_addr)
673 if (mem->userspace_addr & (PAGE_SIZE - 1))
676 if (mem->memory_size & (PAGE_SIZE - 1))
679 kvm->arch.guest_origin = mem->userspace_addr;
680 kvm->arch.guest_memsize = mem->memory_size;
682 /* FIXME: we do want to interrupt running CPUs and update their memory
683 configuration now to avoid race conditions. But hey, changing the
684 memory layout while virtual CPUs are running is usually bad
685 programming practice. */
690 void kvm_arch_flush_shadow(struct kvm *kvm)
694 gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
699 static int __init kvm_s390_init(void)
701 return kvm_init(NULL, sizeof(struct kvm_vcpu), THIS_MODULE);
704 static void __exit kvm_s390_exit(void)
709 module_init(kvm_s390_init);
710 module_exit(kvm_s390_exit);